In [1]:
from dotenv import load_dotenv
load_dotenv()

import os
key = os.getenv('GOOGLE_API_KEY')

In [27]:
from dataclasses import dataclass
from datetime import datetime
from typing import Optional

@dataclass
class Comment:
    comment_id: str
    author: str
    text: str
    like_count: int
    published_at: datetime
    parent_id: Optional[str]

In [31]:
import requests
from typing import List

def get_top_level_comments(video_id) -> List[Comment]:
    url = 'https://www.googleapis.com/youtube/v3/commentThreads'
    order = 'relevance'
    params = {
        'key': key,
        'part': 'snippet',
        'videoId': video_id,
        'maxResults': 100,
        'order': order,
    }
    
    comments = []
    response = requests.get(url, params=params)
    data = response.json()
    for item in data['items']:
        top_comment = item['snippet']['topLevelComment']['snippet']
        snippet_id = item['id']
        author = top_comment['authorDisplayName']
        text = top_comment['textDisplay']
        like_count = top_comment['likeCount']
        published_at = top_comment['publishedAt']

        comments.append(Comment(snippet_id, author, text, like_count, published_at, None))
    return comments

In [33]:
video_id = 'NDsO1LT_0lw'
response = get_top_level_comments(video_id)
response[:5]

[Comment(comment_id='UgzgrqctaRkXF0Ioydx4AaABAg', author='@MrBeast', text='BEAST GAMES FINALE DROPS FEBRUARY 13TH! GO WATCH THE REST NOW! <a href="https://unfur.ly/BeastGames">https://unfur.ly/BeastGames</a>', like_count=58163, published_at='2025-02-08T16:59:31Z', parent_id=None),
 Comment(comment_id='Ugy9mNx9AEZtvO_rplh4AaABAg', author='@YouTube', text='تجربة أسطورية 🤯 مرحبًا بك في مصر! 🇪🇬', like_count=66804, published_at='2025-02-10T16:52:13Z', parent_id=None),
 Comment(comment_id='Ugx98WuRmVg8lEshD9l4AaABAg', author='@marwanrehan', text='As an Egyptian, this is the first time I see and hear about the things shown in this video! Honestly, this is one of the best videos you&#39;ve ever made. ❤❤', like_count=180409, published_at='2025-02-08T17:47:35Z', parent_id=None),
 Comment(comment_id='UgxVm6v87tAKMqFMTUh4AaABAg', author='@oShven', text='Doing what most cant and documenting it for the whole world to see. I LOVE IT <a href="UCkszU2WH9gy1mb0dV-11UJg/ePgfY-K2Kp6Mr8oP1oqAwAc"></a>', li

In [36]:
def get_replies(comment_id: str) -> List[Comment]:
    url = 'https://www.googleapis.com/youtube/v3/comments'
    params = {
        'key': key,
        'part': 'snippet',
        'parentId': comment_id,
        'maxResults': 100
    }

    replies: List[Comment] = []

    while 1:
        response = requests.get(url, params=params)
        data = response.json()

        for item in data.get('items', []):
            snippet = item['snippet']
            replies.append(Comment(
                comment_id=item['id'],
                author=snippet.get('authorDisplayName', ''),
                text=snippet.get('textDisplay', ''),
                like_count=snippet.get('likeCount', 0),
                published_at=datetime.strptime(snippet['publishedAt'], "%Y-%m-%dT%H:%M:%SZ"),
                parent_id=snippet.get('parentId')
            ))

        if 'nextPageToken' in data:
            params['pageToken'] = data['nextPageToken']
        else:
            break

    replies.sort(key=lambda c: c.like_count, reverse=True)
    return replies

In [38]:
response = get_replies('UgzgrqctaRkXF0Ioydx4AaABAg')
response[:5]

[Comment(comment_id='UgzgrqctaRkXF0Ioydx4AaABAg.AEHxAWu37_NAEHxHWb93UL', author='@SirBhogerPlus', text='Already plan to!', like_count=606, published_at=datetime.datetime(2025, 2, 8, 17, 0, 28), parent_id='UgzgrqctaRkXF0Ioydx4AaABAg'),
 Comment(comment_id='UgzgrqctaRkXF0Ioydx4AaABAg.AEHxAWu37_NAEHxU9Ka3uH', author='@ayhamyt6143', text='LOL', like_count=352, published_at=datetime.datetime(2025, 2, 8, 17, 2, 12), parent_id='UgzgrqctaRkXF0Ioydx4AaABAg'),
 Comment(comment_id='UgzgrqctaRkXF0Ioydx4AaABAg.AEHxAWu37_NAEHxUIndd21', author='@Son_Harold', text='Yeah same<br>I love beast games ❤', like_count=276, published_at=datetime.datetime(2025, 2, 8, 17, 2, 13), parent_id='UgzgrqctaRkXF0Ioydx4AaABAg'),
 Comment(comment_id='UgzgrqctaRkXF0Ioydx4AaABAg.AEHxAWu37_NAEHxUi64FkV', author='@Revanftbl', text='Ok', like_count=143, published_at=datetime.datetime(2025, 2, 8, 17, 2, 16), parent_id='UgzgrqctaRkXF0Ioydx4AaABAg'),
 Comment(comment_id='UgzgrqctaRkXF0Ioydx4AaABAg.AEHxAWu37_NAEHxVEY6krb', author

In [39]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer

sia = SentimentIntensityAnalyzer()
text = "test"
score = sia.polarity_scores(text)

print(score)

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/ltera/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
