# Extracting comments from the 100 most popular youtube videos (Before and After the founding of the BSW)

## imports

In [None]:
from googleapiclient.discovery import build
import pandas as pd
from datetime import datetime, timezone


In [38]:
from googleapiclient.errors import HttpError

## API request

'hidden' needs to be replaced by individual api key

BEFORE_CSV and AFTER_CSV are the path to the stored csv files

In [None]:
API_KEY = "hidden"  
FOUNDING_DATE = datetime(2024, 1, 8, tzinfo=timezone.utc)
BEFORE_CSV = "csv/MostPopularVideosB.csv"  
AFTER_CSV = "csv/MostPopularVideosA.csv"  

In [None]:
youtube = build("youtube", "v3", developerKey=API_KEY)


# -----------------------------
# FUNCTION TO GET COMMENTS FOR A VIDEO
# -----------------------------
def get_top_comments(video_id, max_comments=20):
    """
    Fetch top-level comments for a given video.
    max_comments: max number of comments to fetch per video
    """
    comments = []
    next_page_token = None

    while True:
        try:
            request = youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                maxResults=100,
                pageToken=next_page_token,
                order="relevance",
                textFormat="plainText"
            )
            response = request.execute()
        except HttpError as e:
            error_reason = ""
            try:
                error_reason = e.error_details[0]['reason']
            except:
                pass
            if "commentsDisabled" in str(e):
                print(f"[SKIPPED] Comments disabled for video {video_id}")
            else:
                print(f"[ERROR] Could not fetch comments for video {video_id}: {e}")
            break  # Skip this video

        for item in response.get("items", []):
            top_comment = item["snippet"]["topLevelComment"]["snippet"]
            comments.append({
                "videoId": video_id,
                "author": top_comment.get("authorDisplayName"),
                "publishedAt": top_comment.get("publishedAt"),
                "likeCount": top_comment.get("likeCount", 0),
                "text": top_comment.get("textDisplay")
            })

            if len(comments) >= max_comments:
                break

        next_page_token = response.get("nextPageToken")
        if not next_page_token or len(comments) >= max_comments:
            break

    return comments


In [None]:
def extract_comments(df_videos, period_name):
    all_comments = []

    for _, row in df_videos.iterrows():
        video_id = row["videoId"]
        video_comments = get_top_comments(video_id)
        for c in video_comments:
            c["period"] = period_name
        all_comments.extend(video_comments)
  
    df_comments = pd.DataFrame(all_comments)
    if not df_comments.empty:
        df_comments['publishedAt'] = pd.to_datetime(df_comments['publishedAt'], utc=True)
    return df_comments


## reading csv files and making sure pubishedt is in the datetime format needed

In [35]:
df_before = pd.read_csv(BEFORE_CSV)
df_before['publishedAt'] = pd.to_datetime(df_before['publishedAt'], utc=True)


In [37]:
df_after = pd.read_csv(AFTER_CSV)
df_after['publishedAt'] = pd.to_datetime(df_after['publishedAt'], utc=True)

## using the extract comment function to extract the top 20 comments from all videos

In [36]:
df_comments_before = extract_comments(df_before, "Before Founding")


In [39]:

df_comments_after = extract_comments(df_after, "After Founding")


## saving comment data in csv File

In [None]:
df_comments_before.to_csv('csv/SahraWagenknechtCommentsBefore.csv')
df_comments_before

Unnamed: 0,videoId,author,publishedAt,likeCount,text,period
0,WCwx94TOGPQ,@andreasmann1111,2022-02-03T19:47:21Z,5571,Liebe Sahra schön dich gesund zu sehen. Im Geg...,Before Founding
1,WCwx94TOGPQ,@marcelbollier1863,2022-02-03T19:38:12Z,1765,Es tut gut und ist äusserst wichtig sich anzuh...,Before Founding
2,WCwx94TOGPQ,@annemachowinski3292,2022-02-03T19:47:41Z,1594,"Toller Beitrag. Sehr aufschlussreich. Danke, F...",Before Founding
3,WCwx94TOGPQ,@mariuszgruchlik4367,2023-09-28T18:20:14Z,46,Wir sind alle doch Menschen. Wir sollten uns n...,Before Founding
4,WCwx94TOGPQ,@Wollenschrank,2022-02-03T19:43:24Z,723,"Vielen Dank für die sachlichen Darstellungen, ...",Before Founding
...,...,...,...,...,...,...
1995,RhKSSpCXvnQ,@TheAmb1987,2020-12-03T17:03:43Z,138,"so toll reden kann nur jemand, der nicht von i...",Before Founding
1996,RhKSSpCXvnQ,@yassveena,2020-12-04T18:50:39Z,29,Bravo!!! \nDa sieht man wieviel rüberkommt wen...,Before Founding
1997,RhKSSpCXvnQ,@peeachim3835,2020-12-03T17:26:21Z,70,"Liebe Sarah, Dankeschön für wieder mal ein aus...",Before Founding
1998,RhKSSpCXvnQ,@waltraudgrau3560,2020-12-03T17:29:18Z,88,"Sehr gut Frau Wagenknecht , aber warum jetzt e...",Before Founding


In [None]:
df_comments_after.to_csv('csv/SahraWagenknechtCommentsAfter.csv')
df_comments_after

Unnamed: 0,videoId,author,publishedAt,likeCount,text,period
0,qh8xUmT2NyI,@christianstricker4563,2024-07-04T14:46:34Z,56,Ja genau wir wollen Frieden und keine Krigsunt...,After Founding
1,qh8xUmT2NyI,@Armazone-21,2024-05-24T08:14:44Z,1429,"Münte war ja auch der Meinung, es sei ,,unfair...",After Founding
2,qh8xUmT2NyI,@Lyt1991,2024-07-01T13:28:41Z,24,Machen Sie weiter so frau wagenknecht 👍,After Founding
3,qh8xUmT2NyI,@janoschekheldderberge3446,2024-07-09T18:13:34Z,57,❤lichen Dank Frau Wagenknecht! 👍🕊️❤️🕊️👍,After Founding
4,qh8xUmT2NyI,@danielrebmann47,2024-05-26T22:19:50Z,21,"Sahra !! Seit Jahren ehrlich, scharfsinnig. M...",After Founding
...,...,...,...,...,...,...
1975,iL44wTd8Rbk,@JeanDõ-k2b,2025-05-16T09:29:39Z,28,Das freut mich dass Politik so ein auf Deckel ...,After Founding
1976,iL44wTd8Rbk,@adham1881,2025-05-16T06:56:55Z,107,"Ja, machen Sie das publik bitte, liebe Frau Wa...",After Founding
1977,iL44wTd8Rbk,@NothIng-ky7ue,2025-05-16T07:49:01Z,103,Diese frau kriegt keiner unter ❤,After Founding
1978,iL44wTd8Rbk,@yanosh3359,2025-05-16T07:55:17Z,17,Sehr sehr gut zu erfahren!,After Founding
