## Data Scraping from YouTube with Data API

In [2]:
import pandas as pd
import requests
import json

### Objective: 
#### For a given "YouTube channel ID"  we need to return the below mentioned fields for each video in it. 
#### Our final data should have 
        1. video_id
        2. channel_id
        3. published_date 
        4. video_title 
        5. video_description 
        6. likes 
        7. dislikes 
        8. views
        9. comment_count

In [3]:
#Our final data should be saved in this Data Frame
data_df = pd.DataFrame(columns=['video_id','channel_id','published_date',
                             'video_title','video_description',
                             'likes','views','comment_count'])
data_df.head()

Unnamed: 0,video_id,channel_id,published_date,video_title,video_description,likes,views,comment_count


### To access this API
#### 1. We need a API key (free of cost)
(Goto: https://console.developers.google.com/apis/)


In [4]:
api_key = 'AIzaSyDM7Lv64E2iLfAXfqEyggZCjoiSA57H-gQ' 
# People who are aiming reproduce results should obtain their own API key and replace it here.

#### 2. Call API (as per our needs)

(Goto:https://developers.google.com/youtube/v3)

In [5]:
channel_Id = 'UCbqcG1rdt9LMwOJN4PyGTKg' #Id of the KaraFun YouTube Channel
playlist_Id = 'PLBXUDwJU8lxFbS0xfXsrFh-s-FbQwUw2N' #Id of the KaraFun YouTube Top 50 Songs Playlist
url = f"https://www.googleapis.com/youtube/v3/playlistItems?part=snippet%2CcontentDetails&maxResults=50&playlistId={playlist_Id}&key={api_key}"
response_data = requests.get(url)
json_data = json.loads(response_data.text)
video_Ids = []
try:
    for item in json_data['items']:
        video_Id = item['contentDetails']['videoId']
        video_Ids.append(video_Id)
except KeyError:
  pass 

In [6]:
nextPageToken = "EAAaBlBUOkNESQ"
url = f"https://www.googleapis.com/youtube/v3/playlistItems?part=snippet%2CcontentDetails&maxResults=50&playlistId={playlist_Id}&key={api_key}&pageToken={nextPageToken}"
response_data = requests.get(url)
json_data = json.loads(response_data.text)
try:
    for item in json_data['items']:
        video_Id = item['contentDetails']['videoId']
        video_Ids.append(video_Id)
except KeyError:
  pass 

In [7]:
len(video_Ids)
# The Top 50 Playlist was shown to have 100 videos with 2 being unavaiable, so this makes sense so far

100

In [8]:
# Put the data into the df
for i,video_Id in enumerate(video_Ids):
  try:
    url = f"https://www.googleapis.com/youtube/v3/videos?part=statistics,snippet&key={api_key}&id={video_Id}"
    response_data = requests.get(url)
    data = json.loads(response_data.text)
    channel_id = data['items'][0]['snippet']['channelId']      
    published_date = data['items'][0]['snippet']['publishedAt']    
    video_title =  data['items'][0]['snippet']['title']     
    video_description = data['items'][0]['snippet']['description']
    likes =  data["items"][0]["statistics"]["likeCount"]
    views = data["items"][0]["statistics"]["viewCount"]
    comment_count = data["items"][0]["statistics"]['commentCount']
    row = [video_Id,channel_id,published_date,
           video_title,video_description,
           likes,views,comment_count]
    data_df.loc[i]=row
  except:
    pass # avoid the aforementioned two invalid videos

In [9]:
data_df

Unnamed: 0,video_id,channel_id,published_date,video_title,video_description,likes,views,comment_count
0,F2vbPzCGj6c,UCbqcG1rdt9LMwOJN4PyGTKg,2012-04-03T07:48:45Z,A Thousand Years - Christina Perri | Karaoke V...,Download MP3: https://www.karaoke-version.com/...,92468,25295671,4250
1,BvW4efU0HTQ,UCbqcG1rdt9LMwOJN4PyGTKg,2013-05-14T11:22:16Z,Karaoke Colgando en tus manos (duo) - Carlos B...,Descargar MP3: https://www.version-karaoke.es/...,44774,25660436,792
2,81_2Xb0XB_E,UCbqcG1rdt9LMwOJN4PyGTKg,2011-11-07T15:42:27Z,Zombie - The Cranberries | Karaoke Version | K...,Download MP3: https://www.karaoke-version.com/...,96514,27130172,1617
3,0qE3egNettY,UCbqcG1rdt9LMwOJN4PyGTKg,2013-06-13T11:26:27Z,Just Give Me a Reason - Pink feat. Nate Ruess ...,Download MP3: https://www.karaoke-version.com/...,41231,18239667,740
4,IQEzFe9-bsg,UCbqcG1rdt9LMwOJN4PyGTKg,2012-05-11T11:12:06Z,My Heart Will Go On - Céline Dion (Titanic) | ...,Download MP3: https://www.karaoke-version.com/...,45402,15201664,1705
...,...,...,...,...,...,...,...,...
95,HTXiC8g3F40,UCbqcG1rdt9LMwOJN4PyGTKg,2011-11-07T15:14:15Z,Total Eclipse of the Heart - Bonnie Tyler | Ka...,Download MP3: https://www.karaoke-version.com/...,10221,6014148,284
96,0ukH4rBQE48,UCbqcG1rdt9LMwOJN4PyGTKg,2013-08-06T11:37:06Z,Let Her Go - Passenger | Karaoke Version | Kar...,Download MP3: https://www.karaoke-version.com/...,20780,5771976,479
97,ouRPDe_AS68,UCbqcG1rdt9LMwOJN4PyGTKg,2012-09-06T08:41:14Z,Patience - Guns N' Roses | Karaoke Version | K...,Download MP3: https://www.karaoke-version.com/...,16644,5928351,484
98,c8PAcfhRoes,UCbqcG1rdt9LMwOJN4PyGTKg,2012-12-04T10:39:14Z,Don't Cry - Guns N' Roses | Karaoke Version | ...,Download MP3: https://www.karaoke-version.com/...,15997,5543728,333


In [10]:
data_df2 = data_df.drop_duplicates()
data_df2
# This explains why there were 100 videos in a top 50 playlist. Many duplicates.

Unnamed: 0,video_id,channel_id,published_date,video_title,video_description,likes,views,comment_count
0,F2vbPzCGj6c,UCbqcG1rdt9LMwOJN4PyGTKg,2012-04-03T07:48:45Z,A Thousand Years - Christina Perri | Karaoke V...,Download MP3: https://www.karaoke-version.com/...,92468,25295671,4250
1,BvW4efU0HTQ,UCbqcG1rdt9LMwOJN4PyGTKg,2013-05-14T11:22:16Z,Karaoke Colgando en tus manos (duo) - Carlos B...,Descargar MP3: https://www.version-karaoke.es/...,44774,25660436,792
2,81_2Xb0XB_E,UCbqcG1rdt9LMwOJN4PyGTKg,2011-11-07T15:42:27Z,Zombie - The Cranberries | Karaoke Version | K...,Download MP3: https://www.karaoke-version.com/...,96514,27130172,1617
3,0qE3egNettY,UCbqcG1rdt9LMwOJN4PyGTKg,2013-06-13T11:26:27Z,Just Give Me a Reason - Pink feat. Nate Ruess ...,Download MP3: https://www.karaoke-version.com/...,41231,18239667,740
4,IQEzFe9-bsg,UCbqcG1rdt9LMwOJN4PyGTKg,2012-05-11T11:12:06Z,My Heart Will Go On - Céline Dion (Titanic) | ...,Download MP3: https://www.karaoke-version.com/...,45402,15201664,1705
...,...,...,...,...,...,...,...,...
88,sZk7e2yETCQ,UCbqcG1rdt9LMwOJN4PyGTKg,2014-10-28T09:58:02Z,Sway - Bic Runga | Karaoke Version | KaraFun,Download MP3: https://www.karaoke-version.com/...,13083,7604782,94
89,3yCecD_23tA,UCbqcG1rdt9LMwOJN4PyGTKg,2011-12-07T14:37:19Z,Torn - Natalie Imbruglia | Karaoke Version | K...,Download MP3: https://www.karaoke-version.com/...,11683,6793538,381
90,_YJ_SHDienw,UCbqcG1rdt9LMwOJN4PyGTKg,2014-11-21T10:04:16Z,Jealous Guy - John Lennon | Karaoke Version | ...,Download MP3: https://www.karaoke-version.com/...,15000,7621320,172
93,vnsZ6VrprHw,UCbqcG1rdt9LMwOJN4PyGTKg,2016-06-16T08:50:55Z,Complicated - Avril Lavigne | Karaoke Version ...,Download MP3: https://www.karaoke-version.com/...,27072,7675332,221


In [15]:
data_df2 = data_df2.reset_index()

In [17]:
data_df2.to_csv(index=False)

'index,video_id,channel_id,published_date,video_title,video_description,likes,views,comment_count\r\n0,F2vbPzCGj6c,UCbqcG1rdt9LMwOJN4PyGTKg,2012-04-03T07:48:45Z,A Thousand Years - Christina Perri | Karaoke Version | KaraFun,"Download MP3: https://www.karaoke-version.com/mp3-backingtrack/christina-perri/a-thousand-years.html\n\r\nSing Online: https://www.karafun.com/karaoke/christina-perri/a-thousand-years/\r\n\n\r\n* This version contains a low volume vocal guide to help you learn the song. The karaoke version without the vocal guide is available on www.karafun.com. This recording is a cover of A Thousand Years as made famous by Christina Perri - This version is not the original version, and is not performed by Christina Perri. This instrumental/playback version contains a vocal guide, the lyrics and backing vocals.\r\n\n\r\nAll the assets on KaraFun channels are used by permission under licensing agreement with rights holders (music composition, sound re-recording).\r\n\r\n#karafun #k

In [18]:
data_df2.to_csv('karayt.csv')
data_df2.to_csv('C:/Users/isaac/Downloads/karayt.csv')
# Export data to csv