In [36]:
import os
import sys

project_root = os.path.abspath("..")
if project_root not in sys.path:
    sys.path.append(project_root)

from src.api_client import get_youtube_client
import pandas as pd


In [37]:
youtube = get_youtube_client()

In [38]:
def fetch_playlist_videos(youtube, playlist_id):
    video_ids = []
    request = youtube.playlistItems().list(
        part="contentDetails",
        playlistId=playlist_id,
        maxResults=50
    )

    while request:
        response = request.execute()
        video_ids += [
            item["contentDetails"]["videoId"]
            for item in response["items"]
        ]
        request = youtube.playlistItems().list_next(request, response)

    return video_ids


def fetch_video_statistics(youtube, video_ids):
    records = []

    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,statistics,contentDetails",
            id=",".join(video_ids[i:i+50])
        )
        response = request.execute()

        for item in response["items"]:
            records.append({
                "video_id": item["id"],
                "title": item["snippet"]["title"],
                "published_at": item["snippet"]["publishedAt"],
                "views": int(item["statistics"].get("viewCount", 0)),
                "likes": int(item["statistics"].get("likeCount", 0)),
                "comments": int(item["statistics"].get("commentCount", 0)),
                "duration": item["contentDetails"]["duration"]
            })

    return pd.DataFrame(records)

In [39]:
PLAYLIST_ID = "PLwem0A53mZRr9QzyydE_6Mtk2uTEcc1UX"  # or any playlist ID you want

video_ids = fetch_playlist_videos(youtube, PLAYLIST_ID)
df_raw = fetch_video_statistics(youtube, video_ids)
df_raw.head()

Unnamed: 0,video_id,title,published_at,views,likes,comments,duration
0,l5PTG1m9vEE,She Used To Be A Man,2020-04-20T12:00:21Z,5205053,81166,7098,PT1M5S
1,7wQ93t7q8ss,Women VS. Men,2020-04-17T12:00:04Z,1797149,45202,3550,PT1M14S
2,qsMiiLHVDe8,Why I Wake Up Early,2020-04-15T12:00:23Z,3184634,64681,5124,PT1M11S
3,mn2UAPYD5PM,How Armenia Teaches Kids,2020-04-14T12:00:24Z,604307,13521,1062,PT1M10S
4,ZuLC4j_ohdw,The Hidden Cost Of Japan,2020-04-13T12:00:32Z,2230513,45776,2212,PT1M7S


In [40]:
df_raw.to_csv(
    "../data/raw/videos/only_1_minute_videos.csv",
    index=False
)
