In [4]:
from googleapiclient.discovery import build
import googleapiclient.errors
from tqdm import tqdm
import pandas as pd
import traceback
import copy

def get_comment_threads(youtube, video_id, nextPageToken):
    results = youtube.commentThreads().list(
        part='snippet, replies',
        maxResults=100,
        videoId=video_id,
        textFormat="plainText",
        pageToken = nextPageToken
    ).execute()
    return results


class YouTubeComments():
    def __init__(self, video_id, youtube):
        self._null_data ={"author_id":[], "author_url":[], "author_name":[], \
                          "text":[], "reply_count":[], \
                          "top_level":[], "index":[], \
                          "publishedAt":[], "updateAt"   :[], \
                          "likeCount":[]}
        self._data = None
        self._video_id = video_id
        self._youtube = youtube
        self.next_page_token = ""
        self.token_reply = ""
        self.error = 0
        self._data = copy.copy(self._null_data)

    def _append_row(self, item, res, j, reply_count, top_level):
        author = item["snippet"]["authorDisplayName"]
        publishedAt = item["snippet"]["publishedAt"]
        updatedAt = item["snippet"]["updatedAt"]
        likeCount = item["snippet"]["likeCount"]
        text = item["snippet"]["textDisplay"]
        if "authorChannelId" in item["snippet"]:
            aurl = item["snippet"]["authorChannelUrl"]
            uid = item["snippet"]["authorChannelId"]["value"]
        else:
            aurl = ""
            uid = None
        res["author_url"].append(aurl)
        res["author_id"].append(uid)
        res["author_name"].append(author)
        res["text"].append(text)
        res["reply_count"].append(reply_count)
        res["top_level"].append(top_level)
        res["index"].append(j)
        res["publishedAt"].append(publishedAt)
        res["updateAt"].append(updatedAt)
        res["likeCount"].append(likeCount)

    def _add_data(self, match, j, res, youtube):
        for item in tqdm( match['items']):
            comment = item["snippet"]["topLevelComment"]

            reply_count = item['snippet']['totalReplyCount']
            self._append_row(comment, res, j, reply_count, 0)
            # if reply is there
            if reply_count>0:
                i = 1
                replies_list = youtube.comments().list(part='snippet',\
                                           maxResults=100, \
                                           parentId=item['id']).execute()
                for reply in replies_list['items']:
                    self._append_row(reply, res, j, reply_count, i)
                    i += 1

                while "nextPageToken" in  item['replies']['comments']:
                    self._token_reply =  item['replies']['nextPageToken']
                # get next set of 100 replies
                    replies_list = youtube.comments().list(part = 'snippet', \
                                               maxResults = 100, \
                                               parentId = item['id'], \
                                               pageToken = self.token_reply).execute()
                    for reply in replies_list['items']:
                    # add reply to list
                       self._append_row(reply, res, j, reply_count, i)
                       i += 1
            j += 1

        return j

    def _load_data(self):
        self._data = copy.copy(self._null_data)
        match = get_comment_threads(self._youtube, self._video_id, '')
        n = 0
        while match:
            n = self._add_data(match, n, self._data, self._youtube)
            if 'nextPageToken' in match:
                self.next_page_token = match["nextPageToken"]
                match = get_comment_threads(self._youtube, self._video_id, \
                                                          self.next_page_token)
            else:
                break


    def download_comments(self):
        try:
            self._load_data()
        except googleapiclient.errors.HttpError as err:
            self._error = err.resp.status
            print(f"HTTP ERROR STATUS: {self._error}")
            traceback.print_exc()
        return


    def get_df(self):
        return pd.DataFrame(self._data)

    def error(self):
        return self._error



if __name__ == "__main__":

    youtube = build('youtube','v3',
                     developerKey='AIzaSyDbvg6Z7LHeWAYe3eRkQGAMlZmgl-IKnic')
    video_id = "wDkztLMNK9k"
    file_name = 'vdud_tinkov.csv'

    you_tube_comm = YouTubeComments(youtube=youtube, video_id=video_id)
    you_tube_comm.download_comments()
    you_tube_comm.get_df().to_csv(file_name)

100%|██████████| 100/100 [00:00<00:00, 159.83it/s]
100%|██████████| 100/100 [00:01<00:00, 97.79it/s]
100%|██████████| 100/100 [00:00<00:00, 150.58it/s]
100%|██████████| 100/100 [00:01<00:00, 90.08it/s]
100%|██████████| 100/100 [00:01<00:00, 59.95it/s]
100%|██████████| 100/100 [00:01<00:00, 64.93it/s]
100%|██████████| 100/100 [00:01<00:00, 73.47it/s]
100%|██████████| 100/100 [00:01<00:00, 70.65it/s]
100%|██████████| 100/100 [00:01<00:00, 68.59it/s]
100%|██████████| 100/100 [00:00<00:00, 109.46it/s]
100%|██████████| 100/100 [00:01<00:00, 86.68it/s]
100%|██████████| 100/100 [00:01<00:00, 74.99it/s]
100%|██████████| 100/100 [00:01<00:00, 72.80it/s]
100%|██████████| 100/100 [00:01<00:00, 79.69it/s]
100%|██████████| 100/100 [00:01<00:00, 67.86it/s]
100%|██████████| 100/100 [00:02<00:00, 49.12it/s]
100%|██████████| 100/100 [00:01<00:00, 53.47it/s]
100%|██████████| 100/100 [00:02<00:00, 42.41it/s]
100%|██████████| 100/100 [00:01<00:00, 56.86it/s]
100%|██████████| 100/100 [00:01<00:00, 64.28it/

In [7]:
test = pd.read_csv('vdud_tinkov.csv')

In [9]:
test.shape

(94360, 11)

In [8]:
test.head()

Unnamed: 0.1,Unnamed: 0,author_id,author_url,author_name,text,reply_count,top_level,index,publishedAt,updateAt,likeCount
0,0,UCvbkpQBb9RLNNOSSOnRhYVw,http://www.youtube.com/@Crus071,@Crus071,Жду комментарий про сводки смерти женщин и дет...,0,0.0,0.0,2024-04-30T11:43:54Z,2024-04-30T11:43:54Z,0.0
1,1,UC6I2zqimaf-h-3XQAL2B12w,http://www.youtube.com/@Andrew2zz,@Andrew2zz,Что удивляет. Тиньков говорит про глубинный на...,0,0.0,1.0,2024-04-30T11:37:08Z,2024-04-30T11:37:08Z,0.0
2,2,UCERo-8xosq-z4xNSF0RwCQg,http://www.youtube.com/@Schastliviy778,@Schastliviy778,"13:47 Дудь, разве не ты задавал один и тот же ...",0,0.0,2.0,2024-04-30T10:55:54Z,2024-04-30T10:55:54Z,0.0
3,3,UCGOxFmhgtPTSIZ6J-JmEkbA,http://www.youtube.com/@Artem-fl5mr,@Artem-fl5mr,Охуенный чувак,0,0.0,3.0,2024-04-30T06:25:34Z,2024-04-30T06:25:34Z,0.0
4,4,UCd9GbvOZQJFdTODaYmQ03Tg,http://www.youtube.com/@user-hd1em7fo1s,@user-hd1em7fo1s,Опять нытьё....,0,0.0,4.0,2024-04-30T06:05:14Z,2024-04-30T06:05:14Z,0.0
