### Youtube channel statistis Data Analysis using Youtube API

In [1]:
from googleapiclient.discovery import build
from dateutil import parser
import pandas as pd
import numpy as np
import datetime
from IPython.display import JSON

In [2]:
api_key = Your API Key

#### Get Channel id of Kurzgesagt Youtube channel

In [3]:
channel_ids = ['UCsXVk37bltHxD1rDPwtNM8Q']

In [4]:
api_service_name = "youtube"
api_version = "v3"

# Get credentials and create an API client
youtube = build(
    api_service_name, api_version, developerKey=api_key)

#### Get all the relevant channel statistics

In [5]:
def get_channel_stats(youtube, channel_ids):
    all_data = []
    
    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=','.join(channel_ids)
    )
    response = request.execute()

    #loop through items
    for item in response ['items']:
        data ={'channelName': item['snippet']['title'],
               'subscribers': item['statistics']['subscriberCount'],
               'views': item['statistics']['viewCount'],
               'totalViews': item['statistics']['videoCount'],
               'playlistId': item['contentDetails']['relatedPlaylists']['uploads'],
               'channelOriginCountry': item['snippet']['country']
        }

        all_data.append(data)

    return(pd.DataFrame(all_data))

In [6]:
channel_stats = get_channel_stats(youtube, channel_ids)

In [10]:
channel_stats

Unnamed: 0,channelName,subscribers,views,totalViews,playlistId,channelOriginCountry
0,Kurzgesagt – In a Nutshell,21700000,2577844953,206,UUsXVk37bltHxD1rDPwtNM8Q,DE


#### Write the Channel Stats data to CSV file

In [11]:
channel_stats.to_csv(r'D:\MSBI\Portfolio Projects\YouTube API Project\KurzgesagtChannelStats.csv', index = False)

#### Get the list of Video IDs of the given PlaylistID

In [7]:
playlist_id = "UUsXVk37bltHxD1rDPwtNM8Q"
def get_video_ids(youtube, playlist_id):
    
    video_ids = []
    
    request = youtube.playlistItems().list(
        part="snippet,contentDetails",
        playlistId=playlist_id,
        maxResults = 50
    )
    response = request.execute()
    
    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])
        
    next_page_token = response.get('nextPageToken')   #Youtube API has limit of 50 results, so we utilizing the NextPageToken to get all the videoIDs
    while next_page_token is not None:
        request = youtube.playlistItems().list(
                    part='contentDetails',
                    playlistId = playlist_id,
                    maxResults = 50,
                    pageToken = next_page_token)
        response = request.execute()

        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])

        next_page_token = response.get('nextPageToken')
        
    return video_ids

In [8]:
# Get Video IDs
video_ids = get_video_ids(youtube, playlist_id)

In [9]:
len(video_ids)

206

#### Get all the Statistical details of all the videos with given IDs

In [12]:
def get_video_details(youtube, video_ids):

    all_video_info = []
    
    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute() 

        for video in response['items']:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                             'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                             'contentDetails': ['duration', 'definition', 'caption']
                            }
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)
    
    return pd.DataFrame(all_video_info)

In [13]:
# Get video details
video_df = get_video_details(youtube, video_ids)
video_df

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption
0,6AWYgNArZZw,Kurzgesagt – In a Nutshell,Let’s Shrink Earth to Half Its Size #kurzgesag...,What happens when we decide to shrink Earth to...,"[inanutshell, kurzgesagt, kurzgesagt_inanutshe...",2024-01-11T15:00:09Z,175401,17275,,239,PT58S,hd,false
1,Chh2lgGe2YE,Kurzgesagt – In a Nutshell,You Swallowed a Tapeworm - What Now? #kurzgesa...,We swallowed a tapeworm so you don't have to. ...,"[Kurzgesagt, kurzgesagt – in a nutshell, in a ...",2024-01-04T15:00:16Z,381605,34034,,515,PT1M,hd,false
2,qP65siuTLFc,Kurzgesagt – In a Nutshell,Cooking a Chicken in a Particle Accelerator #k...,Who needs an oven when you can just use a part...,"[kurzgesagt, kurzgesagt_inanutshell, kurzgesag...",2023-12-28T15:00:39Z,1213032,100117,,1155,PT1M,hd,false
3,Z_1Q0XB4X0Y,Kurzgesagt – In a Nutshell,The SMALLEST to the LARGEST Thing in The Unive...,This video was sponsored by GiveWell. \nFirst-...,,2023-12-17T15:00:53Z,3945415,180088,,6145,PT12M31S,hd,true
4,nGggU-Cxhv0,Kurzgesagt – In a Nutshell,Your Tattoo is INSIDE Your Immune System. Lite...,Go to https://brilliant.org/nutshell/ to dive ...,,2023-12-12T15:00:01Z,8828025,300173,,10424,PT9M9S,hd,true
...,...,...,...,...,...,...,...,...,...,...,...,...,...
201,F3QpgXBtDeo,Kurzgesagt – In a Nutshell,How The Stock Exchange Works (For Dummies),Why are there stocks at all?\n\nEveryday in th...,"[Stock market, wallstreet, stock exchange, mar...",2013-11-28T17:03:32Z,8308424,130576,,7213,PT3M34S,hd,true
202,UuGrBhK2c7U,Kurzgesagt – In a Nutshell,The Gulf Stream Explained,Learn about the role of the sea in global warm...,"[global conveyor belt, climate change, global ...",2013-10-11T19:11:39Z,5954981,64956,,1988,PT5M4S,hd,true
203,Uti2niW2BRA,Kurzgesagt – In a Nutshell,Fracking explained: opportunity or danger,Fracking explained in five minutes.\n\nFrackin...,"[Fracking, controversy, oil, gas, nature, wate...",2013-09-03T09:12:24Z,7264918,101542,,8075,PT5M3S,hd,true
204,KsF_hdjWJjo,Kurzgesagt – In a Nutshell,The Solar System -- our home in space,An Infographic trip through the wonders of the...,"[Solar system, planets, earth, mars, moon, jup...",2013-08-22T13:24:56Z,6245328,83707,,6180,PT7M21S,hd,true


#### Get the first 10 comments in text from all videos of th given IDs

In [14]:
def get_comments_in_videos(youtube, video_ids):
    all_comments = []
    
    for video_id in video_ids:
        try:   
            request = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id
            )
            response = request.execute()
        
            comments_in_video = [comment['snippet']['topLevelComment']['snippet']['textOriginal'] for comment in response['items'][0:10]]
            comments_in_video_info = {'video_id': video_id, 'comments': comments_in_video}

            all_comments.append(comments_in_video_info)
            
        except: 
            # When error occurs - most likely because comments are disabled on a video
            print('Could not get comments for video ' + video_id)
        
    return pd.DataFrame(all_comments)     

In [16]:
comments_df = get_comments_in_videos(youtube, video_ids)
comments_df

Unnamed: 0,video_id,comments
0,6AWYgNArZZw,"[That alolan Exeggutor at the end is gold 😂, n..."
1,Chh2lgGe2YE,[Always wash out your mouth before and after u...
2,qP65siuTLFc,[“So we can’t recommend…cooking a chicken with...
3,Z_1Q0XB4X0Y,[This video was sponsored by GiveWell. \nFirst...
4,nGggU-Cxhv0,[Go to https://brilliant.org/nutshell/ to dive...
...,...,...
201,F3QpgXBtDeo,[Head over to our shop to get exclusive kurzge...
202,UuGrBhK2c7U,[Head over to our shop to get exclusive kurzge...
203,Uti2niW2BRA,[Head over to our shop to get exclusive kurzge...
204,KsF_hdjWJjo,[You want to learn more about space? Check out...


#### Write Comments data to CSV file

In [17]:
comments_df.to_csv(r'D:\MSBI\Portfolio Projects\YouTube API Project\KurzgesagtComments.csv', index = False)

In [18]:
comments_df['comments'][0]

['That alolan Exeggutor at the end is gold 😂',
 "no, if you are cutting half of the mass of earth, there would be less gravity.\nBut if you compress or shrink Earth, then gravity will increase as all the matter in earth is closer together, and their attractive forces pull on you more.\n\nAlso, if you are just cutting the mass of earth in half, it likely won't spin faster as the mass wasn't pulled in, just cut in half",
 'I love the alolan exegutor in the background at the end',
 'What if it was smaller, but still dense enough to have 1g.',
 'Hi',
 'Wonder how this would look if the earth was just compressed to half its size. Same mass as current earth',
 'About rotation I agree, about gravity I disagree',
 'What about 2x size 🧐',
 "I can never understand the witches' role in the story. Do they still watch over Earthbread like we would with a bowl of goldfish?",
 "How about 'moon'?"]

### Data Pre-Processing

In [19]:
video_df.isnull().any()

video_id          False
channelTitle      False
title             False
description       False
tags               True
publishedAt       False
viewCount         False
likeCount         False
favouriteCount     True
commentCount      False
duration          False
definition        False
caption           False
dtype: bool

In [20]:
video_df.dtypes

video_id          object
channelTitle      object
title             object
description       object
tags              object
publishedAt       object
viewCount         object
likeCount         object
favouriteCount    object
commentCount      object
duration          object
definition        object
caption           object
dtype: object

In [21]:
# Convert count columns to numeric
numeric_cols = ['viewCount', 'likeCount', 'favouriteCount', 'commentCount']
video_df[numeric_cols] = video_df[numeric_cols].apply(pd.to_numeric, errors = 'coerce', axis = 1)

In [22]:
# Publish day in the week
video_df['publishedAt'] = video_df['publishedAt'].apply(lambda x: parser.parse(x)) 
video_df['publishDayName'] = video_df['publishedAt'].apply(lambda x: x.strftime("%A")) 

In [23]:
# convert duration to seconds
import isodate
video_df['durationSecs'] = video_df['duration'].apply(lambda x: isodate.parse_duration(x))
video_df['durationSecs'] = video_df['durationSecs']/np.timedelta64(1, 's')

In [24]:
video_df[['durationSecs', 'duration']] 

Unnamed: 0,durationSecs,duration
0,58.0,PT58S
1,60.0,PT1M
2,60.0,PT1M
3,751.0,PT12M31S
4,549.0,PT9M9S
...,...,...
201,214.0,PT3M34S
202,304.0,PT5M4S
203,303.0,PT5M3S
204,441.0,PT7M21S


In [25]:
# Add tag count
video_df['tagCount'] = video_df['tags'].apply(lambda x: 0 if x is None else len(x))

In [26]:
video_df

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption,publishDayName,durationSecs,tagCount
0,6AWYgNArZZw,Kurzgesagt – In a Nutshell,Let’s Shrink Earth to Half Its Size #kurzgesag...,What happens when we decide to shrink Earth to...,"[inanutshell, kurzgesagt, kurzgesagt_inanutshe...",2024-01-11 15:00:09+00:00,175401.0,17275.0,,239.0,PT58S,hd,false,Thursday,58.0,15
1,Chh2lgGe2YE,Kurzgesagt – In a Nutshell,You Swallowed a Tapeworm - What Now? #kurzgesa...,We swallowed a tapeworm so you don't have to. ...,"[Kurzgesagt, kurzgesagt – in a nutshell, in a ...",2024-01-04 15:00:16+00:00,381605.0,34034.0,,515.0,PT1M,hd,false,Thursday,60.0,16
2,qP65siuTLFc,Kurzgesagt – In a Nutshell,Cooking a Chicken in a Particle Accelerator #k...,Who needs an oven when you can just use a part...,"[kurzgesagt, kurzgesagt_inanutshell, kurzgesag...",2023-12-28 15:00:39+00:00,1213032.0,100117.0,,1155.0,PT1M,hd,false,Thursday,60.0,15
3,Z_1Q0XB4X0Y,Kurzgesagt – In a Nutshell,The SMALLEST to the LARGEST Thing in The Unive...,This video was sponsored by GiveWell. \nFirst-...,,2023-12-17 15:00:53+00:00,3945415.0,180088.0,,6145.0,PT12M31S,hd,true,Sunday,751.0,0
4,nGggU-Cxhv0,Kurzgesagt – In a Nutshell,Your Tattoo is INSIDE Your Immune System. Lite...,Go to https://brilliant.org/nutshell/ to dive ...,,2023-12-12 15:00:01+00:00,8828025.0,300173.0,,10424.0,PT9M9S,hd,true,Tuesday,549.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201,F3QpgXBtDeo,Kurzgesagt – In a Nutshell,How The Stock Exchange Works (For Dummies),Why are there stocks at all?\n\nEveryday in th...,"[Stock market, wallstreet, stock exchange, mar...",2013-11-28 17:03:32+00:00,8308424.0,130576.0,,7213.0,PT3M34S,hd,true,Thursday,214.0,21
202,UuGrBhK2c7U,Kurzgesagt – In a Nutshell,The Gulf Stream Explained,Learn about the role of the sea in global warm...,"[global conveyor belt, climate change, global ...",2013-10-11 19:11:39+00:00,5954981.0,64956.0,,1988.0,PT5M4S,hd,true,Friday,304.0,19
203,Uti2niW2BRA,Kurzgesagt – In a Nutshell,Fracking explained: opportunity or danger,Fracking explained in five minutes.\n\nFrackin...,"[Fracking, controversy, oil, gas, nature, wate...",2013-09-03 09:12:24+00:00,7264918.0,101542.0,,8075.0,PT5M3S,hd,true,Tuesday,303.0,28
204,KsF_hdjWJjo,Kurzgesagt – In a Nutshell,The Solar System -- our home in space,An Infographic trip through the wonders of the...,"[Solar system, planets, earth, mars, moon, jup...",2013-08-22 13:24:56+00:00,6245328.0,83707.0,,6180.0,PT7M21S,hd,true,Thursday,441.0,28


#### Calculated Columns for varoius metrics of Comments, Likes and TitleLength

In [27]:
cols = ['viewCount', 'likeCount', 'favouriteCount', 'commentCount']
video_df[cols] = video_df[cols].apply(pd.to_numeric, errors='coerce', axis=1)

In [28]:
# Comments and likes per 1000 view ratio
video_df['likeRatio'] = video_df['likeCount'] / video_df['viewCount'] * 1000
video_df['commentRatio'] = video_df['commentCount'] / video_df['viewCount'] * 1000

In [29]:
# Title character length
video_df['titleLength'] = video_df['title'].apply(lambda x: len(x))

In [30]:
video_df

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption,publishDayName,durationSecs,tagCount,likeRatio,commentRatio,titleLength
0,6AWYgNArZZw,Kurzgesagt – In a Nutshell,Let’s Shrink Earth to Half Its Size #kurzgesag...,What happens when we decide to shrink Earth to...,"[inanutshell, kurzgesagt, kurzgesagt_inanutshe...",2024-01-11 15:00:09+00:00,175401.0,17275.0,,239.0,PT58S,hd,false,Thursday,58.0,15,98.488606,1.362592,55
1,Chh2lgGe2YE,Kurzgesagt – In a Nutshell,You Swallowed a Tapeworm - What Now? #kurzgesa...,We swallowed a tapeworm so you don't have to. ...,"[Kurzgesagt, kurzgesagt – in a nutshell, in a ...",2024-01-04 15:00:16+00:00,381605.0,34034.0,,515.0,PT1M,hd,false,Thursday,60.0,16,89.186462,1.349563,56
2,qP65siuTLFc,Kurzgesagt – In a Nutshell,Cooking a Chicken in a Particle Accelerator #k...,Who needs an oven when you can just use a part...,"[kurzgesagt, kurzgesagt_inanutshell, kurzgesag...",2023-12-28 15:00:39+00:00,1213032.0,100117.0,,1155.0,PT1M,hd,false,Thursday,60.0,15,82.534509,0.952160,63
3,Z_1Q0XB4X0Y,Kurzgesagt – In a Nutshell,The SMALLEST to the LARGEST Thing in The Unive...,This video was sponsored by GiveWell. \nFirst-...,,2023-12-17 15:00:53+00:00,3945415.0,180088.0,,6145.0,PT12M31S,hd,true,Sunday,751.0,0,45.644881,1.557504,80
4,nGggU-Cxhv0,Kurzgesagt – In a Nutshell,Your Tattoo is INSIDE Your Immune System. Lite...,Go to https://brilliant.org/nutshell/ to dive ...,,2023-12-12 15:00:01+00:00,8828025.0,300173.0,,10424.0,PT9M9S,hd,true,Tuesday,549.0,0,34.002283,1.180785,51
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201,F3QpgXBtDeo,Kurzgesagt – In a Nutshell,How The Stock Exchange Works (For Dummies),Why are there stocks at all?\n\nEveryday in th...,"[Stock market, wallstreet, stock exchange, mar...",2013-11-28 17:03:32+00:00,8308424.0,130576.0,,7213.0,PT3M34S,hd,true,Thursday,214.0,21,15.716097,0.868155,42
202,UuGrBhK2c7U,Kurzgesagt – In a Nutshell,The Gulf Stream Explained,Learn about the role of the sea in global warm...,"[global conveyor belt, climate change, global ...",2013-10-11 19:11:39+00:00,5954981.0,64956.0,,1988.0,PT5M4S,hd,true,Friday,304.0,19,10.907843,0.333838,25
203,Uti2niW2BRA,Kurzgesagt – In a Nutshell,Fracking explained: opportunity or danger,Fracking explained in five minutes.\n\nFrackin...,"[Fracking, controversy, oil, gas, nature, wate...",2013-09-03 09:12:24+00:00,7264918.0,101542.0,,8075.0,PT5M3S,hd,true,Tuesday,303.0,28,13.977033,1.111506,41
204,KsF_hdjWJjo,Kurzgesagt – In a Nutshell,The Solar System -- our home in space,An Infographic trip through the wonders of the...,"[Solar system, planets, earth, mars, moon, jup...",2013-08-22 13:24:56+00:00,6245328.0,83707.0,,6180.0,PT7M21S,hd,true,Thursday,441.0,28,13.403139,0.989540,37


#### Write the final cleaned and pre-processed DataFrame into CSV file

In [31]:
video_df.to_csv(r'D:\MSBI\Portfolio Projects\YouTube API Project\KurzgesagtDataSet.csv', index = False)