In [1]:
from googleapiclient.discovery import build
from dateutil import parser
import pandas as pd
from IPython.display import JSON

# Data viz packages
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

In [2]:
api_key = 'AIzaSyBbJJ_Zaq9EtVWtjngYHkP4whsQbZytUAs'
channel_ids = [
    'UCRBMw93r6YsrWS3B1rsPoyw',
    'UC5PstSsGrRwj2o6asQpC4Rg',
    # add channels
]

In [3]:
import googleapiclient.discovery

api_service_name = "youtube"
api_version = "v3"

# Build the YouTube API client
youtube = build(
    api_service_name, api_version, developerKey=api_key)

In [4]:
def get_channel_stats(youtube, channel_ids):
    
    """
    Get channel stats
    
    Params:
    ------
    youtube: build object of Youtube API
    channel_ids: list of channel IDs
    
    Returns:
    ------
    dataframe with all channel stats for each channel ID
    
    """
    
    all_data = []
    
    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=','.join(channel_ids)
    )
    response = request.execute()

    # loop through items
    for item in response['items']:
        data = {'channelName': item['snippet']['title'],
                'subscribers': item['statistics']['subscriberCount'],
                'views': item['statistics']['viewCount'],
                'totalVideos': item['statistics']['videoCount'],
                'playlistId': item['contentDetails']['relatedPlaylists']['uploads']
        }
        
        all_data.append(data)
        
    return pd.DataFrame(all_data)

In [5]:
def get_video_ids(youtube, playlist_id):
    
    video_ids = []
    
    request = youtube.playlistItems().list(
        part="snippet,contentDetails",
        playlistId=playlist_id,
        maxResults = 50
    )
    response = request.execute()
    
    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])
        
    next_page_token = response.get('nextPageToken')
    while next_page_token is not None:
        request = youtube.playlistItems().list(
                    part='contentDetails',
                    playlistId = playlist_id,
                    maxResults = 50,
                    pageToken = next_page_token)
        response = request.execute()

        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])

        next_page_token = response.get('nextPageToken')
        
    return video_ids

In [6]:
def get_video_details(youtube, video_ids):

    all_video_info = []
    
    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute() 

        for video in response['items']:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                             'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                             'contentDetails': ['duration', 'definition', 'caption']
                            }
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)
    
    return pd.DataFrame(all_video_info)

In [7]:
channel_stats = get_channel_stats(youtube, channel_ids)

In [8]:
channel_stats

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,Snakehips,394000,216459646,97,UURBMw93r6YsrWS3B1rsPoyw
1,FLAGRANT,1680000,408436445,489,UU5PstSsGrRwj2o6asQpC4Rg


In [11]:
playlist_id = "UU5PstSsGrRwj2o6asQpC4Rg"
# Get video IDs
video_ids = get_video_ids(youtube, playlist_id)

In [12]:
len(video_ids)

489

In [13]:
# Get video details
video_df = get_video_details(youtube, video_ids)
video_df

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption
0,SzE9SK7m9sA,FLAGRANT,Algerian Boxer Gender Mystery & Dan Bilzerian ...,What’s good people we back from vacay and disc...,"[andrew schulz, andrew schultz, comedy, comedi...",2024-08-21T15:16:44Z,327141,5579,,1220,PT1H22M45S,hd,false
1,BTLGAs5Ymkk,FLAGRANT,"Young Thug Trial Breakdown, Special Olympics D...","yerrr, we had Brian Simpson on to discuss why ...","[andrew schulz, andrew schultz, comedy, comedi...",2024-08-14T15:04:09Z,347137,5359,,876,PT1H57M35S,hd,false
2,Rj3s1iS5O1c,FLAGRANT,Peter Dinklage on House Of The Dragon vs Game ...,Yerrr we got Peter Dinklage aka Tyrion Lannist...,"[andrew schulz, andrew schultz, comedy, comedi...",2024-08-07T14:23:14Z,1958497,38258,,3091,PT2H10M38S,hd,false
3,bRzPmTs_ywA,FLAGRANT,"Lil Yachty on Drake Beef, MrBeast Controversy,...",YERRR we got Lil Yachty on the pod to show us ...,"[andrew schulz, andrew schultz, comedy, comedi...",2024-07-31T14:22:01Z,811634,15010,,2012,PT1H53M33S,hd,false
4,PXI7zP3Jn7A,FLAGRANT,Trump Survives & Kamala is Dems Hawk Tuah,YERRR We're back babyyyy! Came back with some ...,"[andrew schulz, andrew schultz, comedy, comedi...",2024-07-24T15:01:33Z,1076879,23519,,3638,PT2H11M38S,hd,false
...,...,...,...,...,...,...,...,...,...,...,...,...,...
484,rX4Imv9I8Gw,FLAGRANT,Andrew Schulz - IS THERE ONE ATTRACTIVE FEMINIST?,IS THERE ONE ATTRACTIVE FEMINIST?\n\nThe Flagr...,"[andrew schulz, andrew schultz, comedy, comedi...",2019-07-23T11:00:12Z,53740,2001,,163,PT5M46S,hd,false
485,TGFmb6H1sM8,FLAGRANT,XXXTENTACION JOKE | Andrew Schulz & Akaash Singh,XXXTENTACION JOKE\n\nThe Flagrant 2 podcast is...,"[andrew schulz, andrew schultz, comedy, comedi...",2019-07-23T11:00:04Z,19701,598,,124,PT14M34S,hd,false
486,-PQu6rhmRMg,FLAGRANT,Flagrant 2: How To Not Pay Taxes | Full Episod...,"This week Andrew, Akaash, and Kaz discuss: Wes...","[andrew schulz, andrew schultz, comedy, comedi...",2019-07-17T17:00:00Z,43212,907,,86,PT2H15M55S,hd,false
487,VD5RpAQos34,FLAGRANT,White Girls Dance Better Than Black Girls,#Flagrant2 #AndrewSchulz #AkaashSingh #RealLif...,"[andrew schultz, comedy, comedian, stand up, b...",2019-07-16T22:00:40Z,109672,2179,,159,PT7M56S,hd,false


In [14]:
# Check for NULL values
video_df.isnull().any()

video_id          False
channelTitle      False
title             False
description       False
tags               True
publishedAt       False
viewCount         False
likeCount         False
favouriteCount     True
commentCount      False
duration          False
definition        False
caption           False
dtype: bool

In [15]:
# Check data types
video_df.dtypes

video_id          object
channelTitle      object
title             object
description       object
tags              object
publishedAt       object
viewCount         object
likeCount         object
favouriteCount    object
commentCount      object
duration          object
definition        object
caption           object
dtype: object

In [16]:
# Convert count columns to numeric
numeric_cols = ['viewCount', 'likeCount', 'favouriteCount', 'commentCount']
video_df[numeric_cols] = video_df[numeric_cols].apply(pd.to_numeric, errors = 'coerce', axis = 1)

In [17]:
# Publish day in the week
video_df['publishedAt'] = video_df['publishedAt'].apply(lambda x: parser.parse(x)) 
video_df['pushblishDayName'] = video_df['publishedAt'].apply(lambda x: x.strftime("%A")) 

In [18]:
# Add tag count
video_df['tagCount'] = video_df['tags'].apply(lambda x: 0 if x is None else len(x))

In [19]:
video_df

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption,pushblishDayName,tagCount
0,SzE9SK7m9sA,FLAGRANT,Algerian Boxer Gender Mystery & Dan Bilzerian ...,What’s good people we back from vacay and disc...,"[andrew schulz, andrew schultz, comedy, comedi...",2024-08-21 15:16:44+00:00,327141.0,5579.0,,1220.0,PT1H22M45S,hd,false,Wednesday,21
1,BTLGAs5Ymkk,FLAGRANT,"Young Thug Trial Breakdown, Special Olympics D...","yerrr, we had Brian Simpson on to discuss why ...","[andrew schulz, andrew schultz, comedy, comedi...",2024-08-14 15:04:09+00:00,347137.0,5359.0,,876.0,PT1H57M35S,hd,false,Wednesday,21
2,Rj3s1iS5O1c,FLAGRANT,Peter Dinklage on House Of The Dragon vs Game ...,Yerrr we got Peter Dinklage aka Tyrion Lannist...,"[andrew schulz, andrew schultz, comedy, comedi...",2024-08-07 14:23:14+00:00,1958497.0,38258.0,,3091.0,PT2H10M38S,hd,false,Wednesday,21
3,bRzPmTs_ywA,FLAGRANT,"Lil Yachty on Drake Beef, MrBeast Controversy,...",YERRR we got Lil Yachty on the pod to show us ...,"[andrew schulz, andrew schultz, comedy, comedi...",2024-07-31 14:22:01+00:00,811634.0,15010.0,,2012.0,PT1H53M33S,hd,false,Wednesday,21
4,PXI7zP3Jn7A,FLAGRANT,Trump Survives & Kamala is Dems Hawk Tuah,YERRR We're back babyyyy! Came back with some ...,"[andrew schulz, andrew schultz, comedy, comedi...",2024-07-24 15:01:33+00:00,1076879.0,23519.0,,3638.0,PT2H11M38S,hd,false,Wednesday,21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
484,rX4Imv9I8Gw,FLAGRANT,Andrew Schulz - IS THERE ONE ATTRACTIVE FEMINIST?,IS THERE ONE ATTRACTIVE FEMINIST?\n\nThe Flagr...,"[andrew schulz, andrew schultz, comedy, comedi...",2019-07-23 11:00:12+00:00,53740.0,2001.0,,163.0,PT5M46S,hd,false,Tuesday,21
485,TGFmb6H1sM8,FLAGRANT,XXXTENTACION JOKE | Andrew Schulz & Akaash Singh,XXXTENTACION JOKE\n\nThe Flagrant 2 podcast is...,"[andrew schulz, andrew schultz, comedy, comedi...",2019-07-23 11:00:04+00:00,19701.0,598.0,,124.0,PT14M34S,hd,false,Tuesday,24
486,-PQu6rhmRMg,FLAGRANT,Flagrant 2: How To Not Pay Taxes | Full Episod...,"This week Andrew, Akaash, and Kaz discuss: Wes...","[andrew schulz, andrew schultz, comedy, comedi...",2019-07-17 17:00:00+00:00,43212.0,907.0,,86.0,PT2H15M55S,hd,false,Wednesday,28
487,VD5RpAQos34,FLAGRANT,White Girls Dance Better Than Black Girls,#Flagrant2 #AndrewSchulz #AkaashSingh #RealLif...,"[andrew schultz, comedy, comedian, stand up, b...",2019-07-16 22:00:40+00:00,109672.0,2179.0,,159.0,PT7M56S,hd,false,Tuesday,19


In [20]:
# Save the DataFrame to a CSV file in the same folder
video_df.to_csv('flagrant.csv', index=False)

print("DataFrame saved as video_data.csv")

DataFrame saved as video_data.csv
