In [1]:
#!pip install google-api-python-client

In [3]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
import pandas as pd

load_dotenv()
API_KEY = os.getenv("YOUTUBE_API_KEY")

youtube = build(
    serviceName='youtube',
    version='v3',
    developerKey=API_KEY,
    static_discovery=True
)

def get_video_comments(video_id, max_comments=1000):
    comments = []
    next_page_token = None
    count = 0

    while True:
        response = youtube.commentThreads().list(
            part='snippet,replies',
            videoId=video_id,
            pageToken=next_page_token,
            maxResults=100,
            textFormat="plainText"
        ).execute()

        for item in response.get('items', []):
            snippet = item['snippet']['topLevelComment']['snippet']
            comment = {
                'video_id': video_id,
                'author': snippet.get('authorDisplayName'),
                'comment': snippet.get('textDisplay'),
                'like_count': snippet.get('likeCount'),
                'published_at': snippet.get('publishedAt'),
                'type': 'parent'
            }
            comments.append(comment)
            count += 1

            # Tambahkan balasan jika ada
            replies = item.get('replies', {}).get('comments', [])
            for reply in replies:
                reply_snippet = reply['snippet']
                reply_comment = {
                    'video_id': video_id,
                    'author': reply_snippet.get('authorDisplayName'),
                    'comment': reply_snippet.get('textDisplay'),
                    'like_count': reply_snippet.get('likeCount'),
                    'published_at': reply_snippet.get('publishedAt'),
                    'type': 'reply'
                }
                comments.append(reply_comment)
                count += 1

            if count >= max_comments:
                break

        if 'nextPageToken' in response and count < max_comments:
            next_page_token = response['nextPageToken']
        else:
            break

    return pd.DataFrame(comments)


In [5]:
video_ids = ['hFIMNthZ6ow', 'NYS5HSUVdz8', '8p-pFSN17n0']
all_comments_df = pd.DataFrame()

for vid in video_ids:
    df = get_video_comments(vid, max_comments=1000)
    print(f"{vid}: {len(df)} comments (including replies)")
    all_comments_df = pd.concat([all_comments_df, df], ignore_index=True)

all_comments_df.head()

hFIMNthZ6ow: 405 comments (including replies)
NYS5HSUVdz8: 1000 comments (including replies)
8p-pFSN17n0: 9 comments (including replies)


Unnamed: 0,video_id,author,comment,like_count,published_at,type
0,hFIMNthZ6ow,@월드클래스K,"Hello, can we use your video for our broadcast...",0,2025-05-31T12:39:52Z,parent
1,hFIMNthZ6ow,@OjolPalembang,MU sudah tim biasa saja..,0,2025-05-31T06:31:40Z,parent
2,hFIMNthZ6ow,@herlanbangofficial3753,Hal yg paling qw tunggu laga antara Asean all ...,0,2025-05-31T06:17:40Z,parent
3,hFIMNthZ6ow,@KancilPutra-yu2jv,"Semenjak legend owen ,neved roony ,dan pan per...",0,2025-05-31T01:41:38Z,parent
4,hFIMNthZ6ow,@medroastt9888,luaar biasa emyu cuma kalah 1-0 😜,0,2025-05-31T00:42:47Z,parent


In [None]:
#all_comments_df.to_csv('../data/raw/youtube_comments.csv', index=False)