In [2]:
import googleapiclient.discovery
import pandas as pd

API_SERVICE_NAME = "youtube"
API_VERSION = "v3"
DEVELOPER_KEY = ""

def initialize_youtube_client(api_key):
    return googleapiclient.discovery.build(API_SERVICE_NAME, API_VERSION, developerKey=api_key)

def get_video_comments(youtube_client, video_id, target_comment_count=5000):
    all_comments = []
    page_token = None

    while len(all_comments) < target_comment_count:
        response = fetch_comment_page(youtube_client, video_id, page_token)
        comments_on_page = extract_comments(response)
        all_comments.extend(comments_on_page)
        page_token = response.get('nextPageToken')
        if not page_token:
            break

    return all_comments[:target_comment_count]

def fetch_comment_page(youtube_client, video_id, page_token=None):
    request = youtube_client.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=100,
        pageToken=page_token
    )
    return request.execute()

def extract_comments(api_response):
    return [
        [
            item['snippet']['topLevelComment']['snippet']['authorDisplayName'],
            item['snippet']['topLevelComment']['snippet']['publishedAt'],
            item['snippet']['topLevelComment']['snippet']['updatedAt'],
            item['snippet']['topLevelComment']['snippet']['likeCount'],
            item['snippet']['topLevelComment']['snippet']['textDisplay']
        ]
        for item in api_response.get('items', [])
    ]

if __name__ == "__main__":
    youtube = initialize_youtube_client(DEVELOPER_KEY)
    video_id = "4Tm6Z1y3h94"
    max_comments_to_fetch = 5000


    comments_data = get_video_comments(youtube, video_id, max_comments_to_fetch)


    comments_df = pd.DataFrame(comments_data, columns=['author', 'published_at', 'updated_at', 'like_count', 'text'])


    print(comments_df.head(10))


                          author          published_at            updated_at  \
0               @PursuitofWonder  2024-10-01T15:01:30Z  2024-10-01T15:01:30Z   
1                     @Naratifan  2024-10-13T23:46:30Z  2024-10-13T23:46:30Z   
2  @parvanehalipouralmajavan8234  2024-10-13T23:43:30Z  2024-10-13T23:43:30Z   
3                   @AmritBankar  2024-10-13T22:10:35Z  2024-10-13T22:10:35Z   
4                @temetnosce6192  2024-10-12T23:34:36Z  2024-10-12T23:36:05Z   
5                @RichardS-qh8mi  2024-10-12T11:14:48Z  2024-10-12T11:14:48Z   
6                  @belowfactual  2024-10-11T08:15:02Z  2024-10-11T08:15:02Z   
7               @RoyKristian-l8i  2024-10-09T08:59:22Z  2024-10-09T08:59:22Z   
8                  @Skyler-mw7hw  2024-10-09T08:22:24Z  2024-10-09T08:22:24Z   
9                 @dawnfield4713  2024-10-08T21:37:32Z  2024-10-08T21:37:32Z   

   like_count                                               text  
0           8  Thank you for watching. I hope this v

In [4]:
comments_df.head()

Unnamed: 0,author,published_at,updated_at,like_count,text
0,@PursuitofWonder,2024-10-01T15:01:30Z,2024-10-01T15:01:30Z,8,Thank you for watching. I hope this video help...
1,@Naratifan,2024-10-13T23:46:30Z,2024-10-13T23:46:30Z,0,"Good video, thank you !"
2,@parvanehalipouralmajavan8234,2024-10-13T23:43:30Z,2024-10-13T23:43:30Z,0,💗🙏
3,@AmritBankar,2024-10-13T22:10:35Z,2024-10-13T22:10:35Z,0,I needed to hear this today
4,@temetnosce6192,2024-10-12T23:34:36Z,2024-10-12T23:36:05Z,0,If an atom is the scale... I am a giant!-)


In [5]:
comments_df.shape

(5000, 5)

In [6]:
comments_df.to_csv('comments.csv', encoding='utf-8', index=False)