# Pull All Videos ID's From YouTube 

#### Imports 

In [None]:
import requests
import json
import polars as pl
import os 
 
from dotenv import load_dotenv


In [21]:
load_dotenv()
my_key = os.getenv('my_key')


#### extract videos IDs (+ datetime, title)


In [22]:
def getVideoRecords(response : requests.models.Response):
    """
        Function to extract YouTube video data from GET request response
    """
    video_record_list = []

    for raw_item in json.loads(response.text)['items']:
        if raw_item['id']['kind'] != 'youtube#video':
            continue
        video_record = {}
        video_record['video_id'] = raw_item['id']['videoId']
        video_record['datetime'] = raw_item['snippet']['publishedAt']
        video_record['title'] = raw_item['snippet']['title']

        video_record_list.append(video_record)
        
    return video_record_list
    

In [23]:
# define channel ID
channel_id = 'UCa9gErQ9AE5jT2DZLjXBIdA'

# define url for API
url = 'https://www.googleapis.com/youtube/v3/search'

# initialize page token
page_token = None

# intialize list to store video data
video_record_list = []

In [24]:
%%time
# extract video data across multiple search result pages
while page_token != 0:
    # define parameters for API call
    params = {
        "key": my_key,
        'channelId': channel_id,
        'part': ["snippet","id"],
        'order': "date",
        'maxResults':50,
        'pageToken': page_token
    }

    response = requests.get(url, params)

    # extract video data from response
    video_record_list += getVideoRecords(response)

    try:
        page_token = json.loads(response.text)['nextPageToken']
    except:
        page_token = 0

CPU times: user 28.5 ms, sys: 8.64 ms, total: 37.1 ms
Wall time: 1.1 s


#### write data to file



In [28]:
pl.DataFrame(video_record_list).write_parquet('data/video_ids.parquet')
pl.DataFrame(video_record_list).write_csv('data/video_ids.csv')
df = pl.read_parquet('data/video_ids.parquet')
print(df.head())

shape: (5, 3)
┌─────────────┬──────────────────────┬─────────────────────────────────┐
│ video_id    ┆ datetime             ┆ title                           │
│ ---         ┆ ---                  ┆ ---                             │
│ str         ┆ str                  ┆ str                             │
╞═════════════╪══════════════════════╪═════════════════════════════════╡
│ bZr2vhoXSy8 ┆ 2025-02-08T18:10:05Z ┆ I Trained FLUX.1 on My Face (P… │
│ QvxuR8uLPFs ┆ 2025-02-03T18:00:00Z ┆ How to Build Customer Segments… │
│ W4s6b2ZM6kI ┆ 2025-01-31T22:38:22Z ┆ Fine-tuning Multimodal Embeddi… │
│ hOLBrIjRAj4 ┆ 2025-01-22T21:25:16Z ┆ Fine-Tuning Text Embeddings Fo… │
│ V1BR2tb_e8g ┆ 2025-01-13T21:10:47Z ┆ My AI Development Setup (From … │
└─────────────┴──────────────────────┴─────────────────────────────────┘
