# Extracting all YouTube Videos posted by the Sahra Wagenknecht YouTube Channel

## imports

In [31]:
from googleapiclient.discovery import build
from datetime import datetime, timezone

In [32]:
import pandas as pd

## API Request

'hidden' needs to be replaced by individual API key

In [None]:
API_KEY = 'hidden'

FOUNDING_DATE = datetime(2024, 1, 8 ,tzinfo=timezone.utc)

CHANNEL_ID = 'UCPH3ZPeqWqRVZ_ef4vOZgSw'





In [34]:
def get_uploads_playlist_id(youtube, channel_id):
    """Fetch the uploads playlist ID for a given channel."""
    request = youtube.channels().list(
        part="contentDetails",
        id=channel_id
    )
    response = request.execute()
    return response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]


In [35]:

def get_videos_from_playlist(youtube, playlist_id):
    """Get all video IDs and published dates from the uploads playlist."""
    videos = []
    next_page_token = None

    while True:
        request = youtube.playlistItems().list(
            part="contentDetails",
            playlistId=playlist_id,
            maxResults=50,
            pageToken=next_page_token
        )
        response = request.execute()

        for item in response["items"]:
            videos.append({
                "videoId": item["contentDetails"]["videoId"],
                "publishedAt": item["contentDetails"]["videoPublishedAt"]
            })

        next_page_token = response.get("nextPageToken")
        if not next_page_token:
            break

    return videos


In [36]:
def get_video_stats(youtube, video_ids):
    """Fetch statistics for a list of video IDs."""
    stats = {}
    for i in range(0, len(video_ids), 50):  # batch of 50
        request = youtube.videos().list(
            part="statistics",
            id=",".join(video_ids[i:i+50])
        )
        response = request.execute()
        for item in response.get("items", []):
            # ensure dictionary with defaults
            stats[item["id"]] = {
                "viewCount": int(item["statistics"].get("viewCount", 0)),
                "likeCount": int(item["statistics"].get("likeCount", 0)) if "likeCount" in item["statistics"] else 0,
                "commentCount": int(item["statistics"].get("commentCount", 0)) if "commentCount" in item["statistics"] else 0
            }

        # Handle videos not returned by API (private/deleted), assign default stats
        for vid in video_ids[i:i+50]:
            if vid not in stats:
                stats[vid] = {"viewCount": 0, "likeCount": 0, "commentCount": 0}

    return stats

main loop extracting all videos using the functions above 

this loop also saves the data in a csv File

In [None]:

def main():
    global df_videos
    youtube = build("youtube", "v3", developerKey=API_KEY)

    # Step 1: get uploads playlist
    uploads_playlist = get_uploads_playlist_id(youtube, CHANNEL_ID)

    # Step 2: get all videos
    videos = get_videos_from_playlist(youtube, uploads_playlist)

    # Step 3: get stats for all videos
    video_ids = [v["videoId"] for v in videos]
    stats = get_video_stats(youtube, video_ids)

    # Step 4: split before vs after
    before_videos = []
    after_videos = []

    for v in videos:
        published_date = datetime.fromisoformat(v["publishedAt"].replace("Z", "+00:00"))
        views = stats.get(v["videoId"], 0)

        if published_date < FOUNDING_DATE:
            before_videos.append((v["videoId"], published_date, views))
        else:
            after_videos.append((v["videoId"], published_date, views))

   # Step 5: Build DataFrame
    data = []
    for v in videos:
        published_date = datetime.fromisoformat(v["publishedAt"].replace("Z", "+00:00"))
        video_stats = stats.get(v["videoId"], {"viewCount": 0, "likeCount": None, "commentCount": None})
        period = "Before Founding" if published_date < FOUNDING_DATE else "After Founding"

        data.append({
            "videoId": v["videoId"],
            "publishedAt": published_date,
            "period": period,
            "viewCount": video_stats["viewCount"],
            "likeCount": video_stats["likeCount"],
            "commentCount": video_stats["commentCount"]
        })

    df_videos = pd.DataFrame(data)
    df_videos.to_csv("csv/sahra_wagenknecht_videos.csv", index=False)

if __name__ == "__main__":
    main()

In [45]:
df_videos

Unnamed: 0,videoId,publishedAt,period,viewCount,likeCount,commentCount
0,eN3wrhLqZ4o,2025-09-14 07:14:36+00:00,After Founding,47586,4745,274
1,4gNdgJlro-w,2025-09-13 07:14:02+00:00,After Founding,21646,2206,141
2,iSZk3X-2BKI,2025-09-12 13:14:39+00:00,After Founding,25248,2515,137
3,I5wa38u48U4,2025-09-12 07:15:20+00:00,After Founding,40268,2857,176
4,F0sqnVTGv10,2025-09-11 17:18:36+00:00,After Founding,123860,8462,543
...,...,...,...,...,...,...
470,RGhgywjoY7Q,2019-09-04 11:25:08+00:00,Before Founding,1188,93,3
471,GVUVHfp9apo,2019-09-04 10:28:42+00:00,Before Founding,3532,213,10
472,7kpqXdn3puQ,2019-09-04 10:14:40+00:00,Before Founding,10698,696,71
473,sD288bB83RI,2019-06-07 17:05:18+00:00,Before Founding,4344,246,11
