## <b> YouTube API Project</b>

In [1]:
from googleapiclient.discovery import build
import pandas as pd
from IPython.display import JSON

In [2]:
apiKey = 'YOUR API KEY'
channelIds = ["UCpqXJOEqGS-TCnazcHCo0rA", # theRadBrad
             ]

In [3]:
api_service_name = "youtube"
api_version = "v3"
    
youtube = build(
        api_service_name, api_version, developerKey=apiKey)

In [4]:
def getChannelStats(youtube, channelIds):
    data = []
    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=channelIds
    )
    response = request.execute()
    for i in response['items']:
        info = {'channelName': i['snippet']['title'], 
                'playlistId': i['contentDetails']['relatedPlaylists']['uploads'],
                'views': i['statistics']['viewCount'],
                'subscribers': i['statistics']['subscriberCount'],
                'totalVideos': i['statistics']['videoCount']
               }
        data.append(info)
    return pd.DataFrame(data)      

In [5]:
channelStats = getChannelStats(youtube, channelIds)

In [6]:
channelStats

Unnamed: 0,channelName,playlistId,views,subscribers,totalVideos
0,theRadBrad,UUpqXJOEqGS-TCnazcHCo0rA,5911850483,13200000,7935


In [7]:
playlistIds = 'UUpqXJOEqGS-TCnazcHCo0rA'

In [8]:
def getVideoIds(youtube, playlistIds):
    videoIds = []
    request = youtube.playlistItems().list(
        part="snippet,contentDetails",
        maxResults = 50,
        playlistId=playlistIds
    )
    response = request.execute()
    
    for i in response['items']:
        videoIds.append(i['contentDetails']['videoId'])
    next_page_token = response.get('nextPageToken')
    morePages = True
    while morePages:
        if next_page_token is None:
            morePages = False
        else:
            request = youtube.playlistItems().list(
                part="snippet,contentDetails",
                maxResults = 50,
                playlistId=playlistIds,
                pageToken = next_page_token
            )
            response = request.execute()
    
            for i in response['items']:
                videoIds.append(i['contentDetails']['videoId'])
            
                next_page_token = response.get('nextPageToken')
    return videoIds

In [9]:
videoIds = getVideoIds(youtube, playlistIds)

In [10]:
len(videoIds)

7883

In [11]:
def getVideoDetails(youtube, videoIds):
    allDetails = []
    for i in range(0, len(videoIds), 50):
        request = youtube.videos().list(
            part="snippet, contentDetails, statistics",
            id = ','.join(videoIds[i: i+50])
        )
        response = request.execute()

        # JSON(response)
        for i in response['items']:
            details = {'snippet': ['channelTitle', 'title', 'publishedAt', 'tags', 'description'], 
                       'contentDetails': ['duration', 'caption', 'definition'], 
                       'statistics': ['viewCount', 'likeCount', 'commentCount']
                      }
            videoInfo = {}
            videoInfo['videoId'] = i['id']
            for j in details.keys():
                for k in details[j]:
                    try:
                        videoInfo[k] = i[j][k]
                    except:
                        videoInfo[k] = None
            allDetails.append(videoInfo)
    return pd.DataFrame(allDetails)

In [12]:
videoDetails = getVideoDetails(youtube, videoIds)
videoDetails

Unnamed: 0,videoId,channelTitle,title,publishedAt,tags,description,duration,caption,definition,viewCount,likeCount,commentCount
0,yor2MQwuOWw,theRadBrad,THE CALLISTO PROTOCOL PS5 Walkthrough Gameplay...,2022-12-17T19:00:30Z,"[The Callisto Protocol, Callisto Protocol, The...",PS5 The Callisto Protocol Walkthrough Gameplay...,PT26M18S,false,hd,56734,4028,398
1,_94CM0rTQag,theRadBrad,THE CALLISTO PROTOCOL PS5 Walkthrough Gameplay...,2022-12-16T19:00:23Z,"[The Callisto Protocol, Callisto Protocol, The...",PS5 The Callisto Protocol Walkthrough Gameplay...,PT25M3S,false,hd,73312,4984,512
2,MW_k0XuwUmA,theRadBrad,GOTHAM KNIGHTS Walkthrough Gameplay Part 17 - ...,2022-12-15T23:00:19Z,"[Gotham Knights, Gotham Knights Gameplay, Goth...",PS5 Gotham Knights Walkthrough Gameplay Part 1...,PT33M9S,false,hd,41716,1930,300
3,L4bYpQcxZXU,theRadBrad,THE CALLISTO PROTOCOL PS5 Walkthrough Gameplay...,2022-12-15T19:00:06Z,"[The Callisto Protocol, Callisto Protocol, The...",PS5 The Callisto Protocol Walkthrough Gameplay...,PT25M19S,false,hd,82032,5059,476
4,EU4Turoatnk,theRadBrad,THE CALLISTO PROTOCOL PS5 Walkthrough Gameplay...,2022-12-14T19:00:06Z,"[The Callisto Protocol, Callisto Protocol, The...",PS5 The Callisto Protocol Walkthrough Gameplay...,PT24M2S,false,hd,93036,6002,454
...,...,...,...,...,...,...,...,...,...,...,...,...
7878,cjeVjNPDuJk,theRadBrad,Damaj's Nuke on Salvage (Commentary by theRadB...,2010-04-19T00:46:55Z,"[Call of Duty, Modern Warfare 2, Call of Duty ...",Check out my channel at: http://www.youtube.c...,PT7M49S,false,hd,16094,547,51
7879,JkIcx6gjb3g,theRadBrad,theRadBrad's Tar-21 TDM on Rust (MW2 Gameplay/...,2010-04-15T23:06:44Z,"[Modern Warfare 2, Call of Duty, Call of Duty ...",Check out my channel at: http://www.youtube.co...,PT3M50S,false,hd,20488,693,58
7880,KP4SXSJQXNY,theRadBrad,Modern Warfare 2: theRadBrad's Tar-21 Flawless...,2010-04-11T09:56:00Z,"[Modern Warfare 2, Call of Duty, Call of Duty ...",Check out my channel at: http://www.youtube.co...,PT5M30S,false,hd,41214,891,138
7881,Ye9lTlCx7As,theRadBrad,Modern Warfare 2: theRadBrad's Random Clips,2010-04-08T01:54:00Z,"[theRadBrad, RadBrad, Modern Warfare 2, MW2, G...",Check out my channel at: http://www.youtube.co...,PT2M13S,false,hd,29122,889,127


## Data Pre-processing

In [13]:
videoDetails.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7883 entries, 0 to 7882
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   videoId       7883 non-null   object
 1   channelTitle  7883 non-null   object
 2   title         7883 non-null   object
 3   publishedAt   7883 non-null   object
 4   tags          7881 non-null   object
 5   description   7883 non-null   object
 6   duration      7883 non-null   object
 7   caption       7883 non-null   object
 8   definition    7883 non-null   object
 9   viewCount     7883 non-null   object
 10  likeCount     7880 non-null   object
 11  commentCount  7883 non-null   object
dtypes: object(12)
memory usage: 739.2+ KB


In [14]:
from dateutil import parser
videoDetails['publishedAt'] = videoDetails['publishedAt'].apply(lambda x: parser.parse(x))
videoDetails['publishedDay'] = videoDetails['publishedAt'].apply(lambda x: x.strftime('%A'))

In [15]:
numCols = ['viewCount','likeCount','commentCount']
videoDetails[numCols] = videoDetails[numCols].apply(pd.to_numeric, errors = 'coerce', axis = 1)
videoDetails.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7883 entries, 0 to 7882
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype                  
---  ------        --------------  -----                  
 0   videoId       7883 non-null   object                 
 1   channelTitle  7883 non-null   object                 
 2   title         7883 non-null   object                 
 3   publishedAt   7883 non-null   datetime64[ns, tzutc()]
 4   tags          7881 non-null   object                 
 5   description   7883 non-null   object                 
 6   duration      7883 non-null   object                 
 7   caption       7883 non-null   object                 
 8   definition    7883 non-null   object                 
 9   viewCount     7883 non-null   float64                
 10  likeCount     7880 non-null   float64                
 11  commentCount  7883 non-null   float64                
 12  publishedDay  7883 non-null   object                 
dtypes: 

In [16]:
import isodate
videoDetails['duration'] = videoDetails['duration'].apply(lambda x: isodate.parse_duration(x))
videoDetails['duration'] = videoDetails['duration'].astype('timedelta64[s]')

In [17]:
videoDetails[['duration']]

Unnamed: 0,duration
0,1578.0
1,1503.0
2,1989.0
3,1519.0
4,1442.0
...,...
7878,469.0
7879,230.0
7880,330.0
7881,133.0


In [18]:
videoDetails.isna().sum()

videoId         0
channelTitle    0
title           0
publishedAt     0
tags            2
description     0
duration        0
caption         0
definition      0
viewCount       0
likeCount       3
commentCount    0
publishedDay    0
dtype: int64

In [19]:
videoDetails['tagCount'] = videoDetails['tags'].apply(lambda x: 0 if x is None else len(x))

In [20]:
videoDetails.to_csv('YouTubeAPIprojectDataset.csv')