# Pull All YouTube Video IDs From My Channel

Code authored by: Shaw Talebi <br>

Blog link: https://medium.com/towards-data-science/how-to-build-data-pipelines-for-machine-learning-b97bbef050a5 <br>
Video link: https://youtu.be/OnIQrDiTtRM

### imports

In [1]:
import requests
import json
import polars as pl
from my_sk import my_key

### functions

In [2]:
def getVideoRecords(response: requests.models.Response) -> list:
    """
        Function to extract YouTube video data from GET request response
    """

    video_record_list = []
    
    for raw_item in json.loads(response.text)['items']:
    
        # only execute for youtube videos
        if raw_item['id']['kind'] != "youtube#video":
            continue
        
        video_record = {}
        video_record['video_id'] = raw_item['id']['videoId']
        video_record['datetime'] = raw_item['snippet']['publishedAt']
        video_record['title'] = raw_item['snippet']['title']
        
        video_record_list.append(video_record)

    return video_record_list

### extract videos IDs (+ datetime, title)

In [3]:
# define channel ID
channel_id = 'UCa9gErQ9AE5jT2DZLjXBIdA'

# define url for API
url = 'https://www.googleapis.com/youtube/v3/search'

# initialize page token
page_token = None

# intialize list to store video data
video_record_list = []

In [4]:
%%time
# extract video data across multiple search result pages
while page_token != 0:
    # define parameters for API call
    params = {"key": my_key, 'channelId': channel_id, 'part': ["snippet","id"], 'order': "date", 'maxResults':50, 'pageToken': page_token}
    # make get request
    response = requests.get(url, params=params)

    # append video records to list
    video_record_list += getVideoRecords(response)

    try:
        # grab next page token
        page_token = json.loads(response.text)['nextPageToken']
    except:
        # if no next page token kill while loop
        page_token = 0

CPU times: user 62.1 ms, sys: 8.74 ms, total: 70.9 ms
Wall time: 712 ms


### write data to file

In [5]:
# write data to file
pl.DataFrame(video_record_list).write_parquet('data/video-ids.parquet')
pl.DataFrame(video_record_list).write_csv('data/video-ids.csv')