In [4]:
import os # pyright: ignore[reportUndefinedVariable]
import requests
import pandas as pd

API_KEY = os.environ.get('YOUTUBE_API_KEY')
if API_KEY is None:
    raise RuntimeError('Set YOUTUBE_API_KEY in the environment before running this notebook')
CHANNEL_ID = "UC4SVo0Ue36XCfOyb5Lh1viQ"



In [5]:
url = "https://www.googleapis.com/youtube/v3/search"
params = {
    "key": API_KEY,
    "channelId": CHANNEL_ID,
    "part": "snippet,id",
    "order": "date",
    "maxResults": 50,
    "hl": "en",
    "regionCode": "US"
}

def extract_video_fields(item):
    video_id = item["id"]["videoId"]
    title = item["snippet"]["title"]
    published = item["snippet"]["publishedAt"]
    return video_id, title, published

videos = []
next_page_token = None

while True:
    if next_page_token:
        params['pageToken'] = next_page_token

    try:
        resp = requests.get(url, params=params, timeout=10)
        resp.raise_for_status()
        data = resp.json()
    except Exception as e:
        print('Request failed:', e)
        break

    for item in data.get("items", []):
        if "videoId" in item.get("id", {}):
            video_id, title, published = extract_video_fields(item)
            videos.append([video_id, title, published])

    next_page_token = data.get("nextPageToken")
    if not next_page_token:
        break

In [6]:
df = pd.DataFrame(videos, columns=["video_id", "title", "published_date"])
print(df.head()) 
df.to_csv("../data/raw/youtube_metadata.csv", index=False)


      video_id                                              title  \
0  fPopFeqw5sk             Learn NumPy data types in 8 minutes! üí±   
1  xf71dRBRP6o                Build a music player with Python! üé∂   
2  k0D3MQwLn7A            Learn Python iterators in 7 minutes! ‚û°Ô∏è   
3  ZBlxaXMN_hU  Learn Python generator expressions in 9 minute...   
4  c9vhHUGdav0                      Learn Matplotlib in 1 hour! üìä   

         published_date  
0  2025-11-21T15:40:45Z  
1  2025-11-20T16:51:00Z  
2  2025-11-10T14:21:48Z  
3  2025-11-07T15:08:16Z  
4  2025-10-09T14:01:03Z  
