In [1]:
import os
import googleapiclient.discovery
import pandas as pd
import re
import warnings
warnings.filterwarnings('ignore')

In [3]:
def fetch_top_videos(query, api_key, max_results):
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

    api_service_name = "youtube"
    api_version = "v3"

    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey=api_key)

    # Fetch video IDs for the top results
    request = youtube.search().list(
        q=query,
        type='video',
        part='id',
        maxResults=max_results
    )

    response = request.execute()
    video_ids = [item['id']['videoId'] for item in response['items']]
    return video_ids

def fetch_video_details(video_id, api_key):
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

    api_service_name = "youtube"
    api_version = "v3"

    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey=api_key)

    # Fetch video details, including comments
    request = youtube.videos().list(
        part='snippet,statistics,recordingDetails',
        id=video_id
    )

    response = request.execute()
    return response['items'][0]

def fetch_video_comments_with_authors(video_id, api_key):
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

    api_service_name = "youtube"
    api_version = "v3"

    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey=api_key)

    comments = []
    comment_authors = []  # Store comment authors

    # Fetch top-level comments
    top_level_request = youtube.commentThreads().list(
        part='snippet',
        videoId=video_id,
        textFormat='plainText',
        maxResults=1000
    )

    try:
        top_level_response = top_level_request.execute()
    except googleapiclient.errors.HttpError as e:
        # Handle the error when comments are disabled
        if "commentsDisabled" in str(e):
            return [], []

    if 'items' in top_level_response:
        for item in top_level_response['items']:
            comment_text = item['snippet']['topLevelComment']['snippet']['textDisplay']
            comment_author = item['snippet']['topLevelComment']['snippet']['authorDisplayName']
            
            comments.append(comment_text)
            comment_authors.append(comment_author)

            # Fetch replies to the top-level comment
            reply_request = youtube.comments().list(
                part='snippet',
                parentId=item['id']
            )

            reply_response = reply_request.execute()

            if 'items' in reply_response:
                for reply_item in reply_response['items']:
                    reply_text = reply_item['snippet']['textDisplay']
                    reply_author = reply_item['snippet']['authorDisplayName']
                    
                    comments.append(reply_text)
                    comment_authors.append(reply_author)

    return comments, comment_authors

def main():
    query = "financial independence retire early"
    api_key = "AIzaSyC92u5ysviEtdvi9WlV-MNurtcFoZ5XLuc"
    max_results = int(input("Enter the number of top videos to fetch: "))

    video_ids = fetch_top_videos(query, api_key, max_results)

    video_data = []

    for video_id in video_ids:
        try:
            video_details = fetch_video_details(video_id, api_key)
            title = video_details['snippet']['title']
            description = video_details['snippet']['description']
            view_count = video_details['statistics']['viewCount']
            like_count = video_details['statistics']['likeCount']

            if 'commentCount' in video_details['statistics']:
                comment_count = video_details['statistics']['commentCount']
            else:
                comment_count = 0
            upload_date = video_details['snippet']['publishedAt']
            channel_name = video_details['snippet']['channelTitle']

            comments, comment_authors = fetch_video_comments_with_authors(video_id, api_key)

            video_data.append({
                'Video ID': video_id,
                'Title': title,
                'Description': description,
                'View Count': view_count,
                'Like Count': like_count,
                'Comment Count': comment_count,
                'Upload Date': upload_date,
                'Channel Name': channel_name,
                'Comments': comments,
                'comment_authors': comment_authors
            })
        
        except TimeoutError:
            print(f"API request for video ID {video_id} timed out. Skipping this video.")
            continue

    # Create a list of dictionaries for individual comments
    comment_data = []
    for video in video_data:
        for comment, author in zip(video['Comments'], video['comment_authors']):
            comment_data.append({
                'Video ID': video['Video ID'],
                'Title': video['Title'],
                'Description': video['Description'],
                'View Count': video['View Count'],
                'Like Count': video['Like Count'],
                'Comment Count': video['Comment Count'],
                'Upload Date': video['Upload Date'],
                'Channel Name': video['Channel Name'],
                'Comment': comment,
                'comment_author': author
            })

    df = pd.DataFrame(comment_data)

    df.to_csv('youtube_video_data_with_comments.csv', index=False)
    print("Video data saved to youtube_video_data_with_comments.csv")

if __name__ == "__main__":
    main()


Enter the number of top videos to fetch: 30
API request for video ID dkTbBmJnp9U timed out. Skipping this video.
Video data saved to youtube_video_data_with_comments.csv
