# Get chat commets
### 1. Get first comment
### 2. Each comment has reference of next comment

In [None]:
import json
import datetime
import pandas as pd
import requests
from bs4 import BeautifulSoup
from retry import retry
import os
import sys

class ContinuationURLNotFound(Exception):
   pass

class LiveChatReplayDisabled(Exception):
   pass

class RestrictedFromYoutube(Exception):
   pass

# get first comment
def get_ytInitialData(target_url, session):
   headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'}
   html = session.get(target_url, headers=headers)
   soup = BeautifulSoup(html.text, 'html.parser')
   for script in soup.find_all('script'):
       script_text = str(script)
       if 'ytInitialData' in script_text:
           for line in script_text.splitlines():
               if 'ytInitialData' in line:
                   if 'var ytInitialData =' in line:
                       st = line.strip().find('var ytInitialData =') + 19
                       return json.loads(line.strip()[st:-10])
                   if 'window["ytInitialData"] =' in line:
                       return json.loads(line.strip()[len('window["ytInitialData"] = '):-1])

   if 'Sorry for the interruption. We have been receiving a large volume of requests from your network.' in str(soup):
       print("restricted from Youtube (Rate limit)")
       raise RestrictedFromYoutube

   return None

def check_livechat_replay_disable(ytInitialData):
   conversationBar = ytInitialData['contents'].get('twoColumnWatchNextResults',{}).get('conversationBar', {})
   if conversationBar:
       conversationBarRenderer = conversationBar.get('conversationBarRenderer', {})
       if conversationBarRenderer:
           text = conversationBarRenderer.get('availabilityMessage',{}).get('messageRenderer',{}).get('text',{}).get('runs',[{}])[0].get('text')
           print(text)
           if text == 'Chat replays are not available for this video.':
               return True
   else:
       return True

   return False

@retry(ContinuationURLNotFound, tries=3, delay=1)
def get_initial_continuation(target_url):

   session = requests.session()
   try:
       ytInitialData = get_ytInitialData(target_url, session)
   except RestrictedFromYoutube:
       return None

   if not ytInitialData:
       print("Cannot get ytInitialData")
       raise ContinuationURLNotFound

   if check_livechat_replay_disable(ytInitialData):
       print("LiveChat Replay is disable")
       raise LiveChatReplayDisabled

   continue_dict = {}
   try:
       subMenuItems = ytInitialData['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems']
       for continuation in subMenuItems:
           continue_dict[continuation['title']] = continuation['continuation']['reloadContinuationData']['continuation']
   except KeyError:
       print("Cannot find continuation")

   continue_url = None
   if not continue_url:
       if continue_dict.get('上位のチャットのリプレイ'):
           continue_url = continue_dict.get('上位のチャットのリプレイ')
       if continue_dict.get('Top chat replay'):
           continue_url = continue_dict.get('Top chat replay')
   
   if not continue_url:
       if continue_dict.get('チャットのリプレイ'):
           continue_url = continue_dict.get('チャットのリプレイ')
       if continue_dict.get('Live chat replay'):
           continue_url = continue_dict.get('Live chat replay')
   
   if not continue_url:
       continue_url = ytInitialData["contents"]["twoColumnWatchNextResults"].get("conversationBar", {}).get("liveChatRenderer",{}).get("continuations",[{}])[0].get("reloadContinuationData", {}).get("continuation")

   if not continue_url:
       raise ContinuationURLNotFound

   return continue_url

def check_initial_continuation(target_url):

   try:
       continuation = get_initial_continuation(target_url)
   except LiveChatReplayDisabled:
       print(" The video is disabled Livechat replay, create blank list")
       return None
   except ContinuationURLNotFound:
       print(" can not find continuation url")
       return None
   except Exception as e:
       print(e)
   else:
       return continuation


channel_id = "チャンネルID"
channel_title = "チャンネル名"
target_url = input("Eenter url\n")

continuation = check_initial_continuation(target_url)
if not continuation:
    print("not continuation")
    continuation = ""

print(continuation)
   


In [None]:
import os
import sys
import json
import requests



headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}

def fetch_json():
    print("fetch_json()",file=sys.stderr)
    url = "https://www.youtube.com/live_chat_replay?continuation=" + continuation
    res = requests.get(url, headers=headers)
    lines = res.text.splitlines() # split next line
    json_text = ""
       
    for l in lines: # Extract the json
        pos = l.find("{\"responseContext")
        if pos > 0:
            json_text = l[pos:len(l)-1]
            json_text = json_text[:json_text.find('<')-1] 

    if json_text == "":
        print("json_text is None",file=sys.stderr)
        jsn = fetch_json()
        return jsn
    
    try:
        jsn = json.loads(json_text)
        return jsn
    
    except json.JSONDecodeError as e:
        print(sys.exc_info(),file=sys.stderr)
        print("JSONDecodeError",file=sys.stderr)
        fetch_json()


# parse json and write csv
def parse_json():
    global jsn
    print("parse_json()",file=sys.stderr)

    if jsn is None:
        print("jsn is None",file=sys.stderr)
        jsn = fetch_json()
    actions = jsn.get("continuationContents").get("liveChatContinuation").get("actions")
    if actions is None:
        print("actions is None",file=sys.stderr)

    else:
        for j in actions:
            addChatItemAction = j["replayChatItemAction"]["actions"][0].get("addChatItemAction")
            if addChatItemAction != None:
                
                liveChatTextMessageRenderer = addChatItemAction["item"].get("liveChatTextMessageRenderer")
                if liveChatTextMessageRenderer != None:
                    timestamp = liveChatTextMessageRenderer["timestampText"]["simpleText"]
                    
                    # Exclude comments before start
                    if timestamp[0] == "-":
                        continue
                    
                    # format time as 00:00:00
                    if timestamp[1] == ":":
                        timestamp = "0" + timestamp
                    if len(timestamp) < 8:
                        timestamp = "00:" + timestamp
            
                    runs = liveChatTextMessageRenderer["message"]["runs"]
                    text = ""
                    for r in runs:
                        if r.get("text") is None:
                            if r["emoji"].get("shortcuts") is None:
                                text += r["emoji"]["emojiId"]
                                
                            else:
                                text += r["emoji"]["shortcuts"][0]
                        else:
                            text += r["text"]

                    with open(chat_replay_file, mode="a") as f:
                        # Replace "," without collapsing the number of columns in the CSV
                        f.write(f"{timestamp},{text.replace(',','_')}\n")
                else:
                    print("not liveChatTextMessageRenderer",file=sys.stderr)
            else:
                print("not addChatItemAction",file=sys.stderr)


def fetch_chat_replay():
    global continuation, jsn, chat_replay_file
    
    if not continuation:
        print("chat replay is not exists")
        exit(0)

    print("Enter file name")
    chat_replay_file = input() + ".csv"
    with open(chat_replay_file, mode="w") as f:
        f.write("timestamp,text\n")

    jsn = None
    while True:
        if not continuation:
            print("not continuation. Process done")
            break
        jsn = fetch_json()
        if jsn is None:
            print("jsn=",jsn,file=sys.stderr)
        parse_json()
        if jsn is None:
            print("jsn=",jsn,file=sys.stderr)
        else:

            liveChatReplayContinuationData = jsn.get("continuationContents").get("liveChatContinuation").get("continuations")[0].get("liveChatReplayContinuationData")
            if liveChatReplayContinuationData:
                continuation = liveChatReplayContinuationData.get("continuation")
            else:
                continuation = None

fetch_chat_replay()

