In [10]:
from googleapiclient.discovery import build
import pandas as pd
import getpass
import re

In [11]:
api_key = ""

# Build the YouTube client
youtube = build('youtube', 'v3', developerKey=api_key)

## Get IDs from the youtube URL

In [15]:
import re

def extract_youtube_ids(input_string):
    # Regular expression pattern to match YouTube video or playlist IDs
    pattern = r'(?:https?://)?(?:www\.)?(?:youtube\.com/(?:watch\?v=|playlist\?list=)|youtu.be/)([a-zA-Z0-9_-]+)'
    
    # Find all matches in the input string
    matches = re.findall(pattern, input_string)
    
    # Return the list of matches
    return matches

# Function to get user input for YouTube videos
def get_youtube_videos():
    input_string = input("Enter YouTube video URLs separated by commas: ")
    video_ids = extract_youtube_ids(input_string)
    return video_ids

# Function to get user input for YouTube playlists
def get_youtube_playlists():
    input_string = input("Enter YouTube playlist URLs separated by commas: ")
    playlist_ids = extract_youtube_ids(input_string)
    return playlist_ids

# Main function

print("Choose an option:")
print("1. Enter YouTube video URLs")
print("2. Enter YouTube playlist URLs")
choice = input("Enter your choice (1 or 2): ")

if choice == '1':
    video_ids = get_youtube_videos()
    print("Extracted video IDs:", video_ids)
elif choice == '2':
    playlist_ids = get_youtube_playlists()
    print("Extracted playlist IDs:", playlist_ids)
else:
    print("Invalid choice. Please enter either 1 or 2.")

Choose an option:
1. Enter YouTube video URLs
2. Enter YouTube playlist URLs
Extracted video IDs: ['cTFMQMiCkfE', 'NOze-1ygsrQ']


## For Youtube videos we are going to extract all the comments from the comment section.

In [16]:
# Function to get replies for a specific comment
def get_replies(youtube, parent_id, video_ids):  # Added video_id as an argument
    replies = []
    next_page_token = None

    while True:
        reply_request = youtube.comments().list(
            part="snippet",
            parentId=parent_id,
            textFormat="plainText",
            maxResults=100,
            pageToken=next_page_token
        )
        reply_response = reply_request.execute()

        for item in reply_response['items']:
            comment = item['snippet']
            replies.append({
                'Timestamp': comment['publishedAt'],
                'Username': comment['authorDisplayName'],
                'VideoID': video_ids,
                'Comment': comment['textDisplay'],
                'Date': comment['updatedAt'] if 'updatedAt' in comment else comment['publishedAt']
            })

        next_page_token = reply_response.get('nextPageToken')
        if not next_page_token:
            break

    return replies


# Function to get all comments (including replies) for a single video
def get_comments_for_video(youtube, video_ids):
    all_comments = []
    next_page_token = None

    while True:
        comment_request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_ids,
            pageToken=next_page_token,
            textFormat="plainText",
            maxResults=100
        )
        comment_response = comment_request.execute()

        for item in comment_response['items']:
            top_comment = item['snippet']['topLevelComment']['snippet']
            all_comments.append({
                'Timestamp': top_comment['publishedAt'],
                'Username': top_comment['authorDisplayName'],
                'VideoID': video_ids,  # Directly using video_id from function parameter
                'Comment': top_comment['textDisplay'],
                'Date': top_comment['updatedAt'] if 'updatedAt' in top_comment else top_comment['publishedAt']
            })

            # Fetch replies if there are any
            if item['snippet']['totalReplyCount'] > 0:
                all_comments.extend(get_replies(youtube, item['snippet']['topLevelComment']['id'], video_ids))

        next_page_token = comment_response.get('nextPageToken')
        if not next_page_token:
            break

    return all_comments

In [17]:
# List to hold all comments from all videos
all_comments = []


for video_id in video_ids:
    video_comments = get_comments_for_video(youtube, video_id)
    all_comments.extend(video_comments)

# Create DataFrame
comments_df = pd.DataFrame(all_comments)

In [18]:
len(comments_df)

10168

In [21]:
comments_df.head(8000)

Unnamed: 0,Timestamp,Username,VideoID,Comment,Date
0,2024-04-06T16:57:07Z,@anthonyblacker8471,cTFMQMiCkfE,It's really the only way to proceed Mat. How e...,2024-04-06T16:57:07Z
1,2024-04-06T16:53:26Z,@lollypoofy,cTFMQMiCkfE,they laugh the whole video hahaha,2024-04-06T16:53:26Z
2,2024-04-06T16:44:02Z,@BatkoMahno,cTFMQMiCkfE,Maybe you want to build a V12 Drift wide body ...,2024-04-06T16:44:02Z
3,2024-04-06T16:31:54Z,@mohammadahmed8171,cTFMQMiCkfE,Those 2 guys laugh too much,2024-04-06T16:31:54Z
4,2024-04-06T16:31:02Z,@svent2938,cTFMQMiCkfE,Suuuuuu 🙆🙅👫👫👫,2024-04-06T16:31:02Z
...,...,...,...,...,...
7995,2024-01-02T00:15:49Z,@NovaMoonx,NOze-1ygsrQ,DEFINITELY WORTH IT!!,2024-01-02T00:15:49Z
7996,2024-01-02T00:15:46Z,@CJMediaOfficial,NOze-1ygsrQ,This is insanely impressive for such a short t...,2024-01-02T00:15:46Z
7997,2024-01-02T00:15:39Z,@grahamstone816,NOze-1ygsrQ,"Imo by far your best video, good job hard work...",2024-01-02T00:15:39Z
7998,2024-01-02T00:14:29Z,@eclipseaus,NOze-1ygsrQ,"The best 40 minutes I've ever spent so far, Ma...",2024-01-02T00:14:29Z
