In [128]:
import pandas as pd
import numpy as np
import pickle

from googleapiclient.discovery import build
from dotenv import load_dotenv
import os

In [45]:
load_dotenv()
api_key = os.getenv('api_key')
youtube = build('youtube', 'v3', developerKey=api_key)

In [75]:
def get_video_ids(youtube, playlist_id):
    video_ids = []
    
    request = youtube.playlistItems().list(
        part='contentDetails',
        playlistId=playlist_id,
        maxResults = 50
    )
    
    response = request.execute()
    
    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])
        
    next_page_token = response.get('nextPageToken')
    
    while next_page_token is not None:
        request = youtube.playlistItems().list(
            part='contentDetails',
            playlistId = playlist_id,
            maxResults = 50,
            pageToken = next_page_token
        )
        response = request.execute()
        
        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])
            
        next_page_token = response.get('nextPageToken')
        
    return video_ids

In [71]:
def get_playlists(youtube, channel_ids):
    upload_playlists = []
    
    request = youtube.channels().list(
        part='contentDetails',
        id=','.join(channel_ids)
    )
    
    response = request.execute()
    
    for item in response['items']:
        upload_playlists.append(item['contentDetails']['relatedPlaylists']['uploads'])
        
    return upload_playlists

In [125]:
def get_comments_in_videos(youtube, video_ids):
    comments = np.array([])
    
    for video_id in video_ids:
        request = youtube.commentThreads().list(
            part='snippet,replies',
            videoId=video_id
        )
        response = request.execute()
        
        comments_in_video = [
            comment['snippet']['topLevelComment']['snippet']['textOriginal'] for comment in response['items']
        ]
        comments = np.append(comments, comments_in_video)
        
    return comments

In [67]:
mr_beast = 'UCX6OQ3DkcsbYNE6H8uQQuVA'
channel_ids = [mr_beast]

In [79]:
temp = get_playlists(youtube, channel_ids)
temp

['UUX6OQ3DkcsbYNE6H8uQQuVA']

In [81]:
test = get_video_ids(youtube, temp[0])

In [88]:
len(test)

723

In [126]:
test2 = get_comments_in_videos(youtube, test[0:4])

In [137]:
youtube_comments = pd.DataFrame(columns = ['comment', 'is_spam'])
youtube_comments

Unnamed: 0,comment,is_spam


In [138]:
youtube_comments.to_csv('youtube_comments.csv', index = False)