## Getting API Keys

In [13]:
from dotenv import load_dotenv
import os

load_dotenv()

REDDIT_ID = os.getenv("REDDIT_CLIENT_ID")
REDDIT_SECRET = os.getenv("REDDIT_CLIENT_SECRET")

## 📦 Install required libraries

In [14]:
# 📚 Imports
import praw
import pandas as pd
import re
import random
import time
from datetime import datetime, timedelta
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
import nltk
nltk.download("punkt")

[nltk_data] Downloading package punkt to /home/codespace/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## 🔑 Reddit API Setup (Read-only mode)

In [15]:
reddit = praw.Reddit(
    client_id=REDDIT_ID,
    client_secret=REDDIT_SECRET,
    user_agent="youtube_to_reddit_sentiment"
)

## 📥 Load video titles


In [16]:
youtube_df = pd.read_csv("../data/youtube_data.csv")

# 🕰️ Optional: Calculate Reddit 'time_filter' based on YouTube’s oldest timestamp
# We'll mock it as 'month' or 'week' here (PRAW only allows fixed ranges)
time_filter = "month"

## 🧹 Clean text

In [17]:
def clean_text(text):
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
    return text.lower()

## 📊 Sentiment analysis


In [18]:
analyzer = SentimentIntensityAnalyzer()
def analyze_sentiment(text):
    return analyzer.polarity_scores(text)["compound"]

In [19]:
# 😃 Convert score to emoji
def sentiment_emoji(score):
    if score >= 0.5:
        return "😃"
    elif score <= -0.5:
        return "😠"
    else:
        return "😐"

## 🧠 Reddit Post + Comment collector


In [20]:
%%time
all_data = []

for topic in youtube_df["video_title"].unique():
    posts = reddit.subreddit("all").search(query=topic, limit=5, time_filter=time_filter)

    for post in posts:
        post_data = {
            "youtube_title": topic,
            "reddit_post_title": post.title,
            "post_score": post.score,
            "post_url": post.url,
            "post_created": pd.to_datetime(post.created_utc, unit="s"),
            "post_sentiment": analyze_sentiment(clean_text(post.title)),
            "post_sentiment_emoji": sentiment_emoji(analyze_sentiment(clean_text(post.title))),
        }

        # ⛓️ Add top 3 comments per post
        post.comments.replace_more(limit=0)
        comments = post.comments[:3]
        for comment in comments:
            cleaned = clean_text(comment.body)
            all_data.append({
                **post_data,
                "comment": comment.body,
                "cleaned_comment": cleaned,
                "comment_sentiment": analyze_sentiment(cleaned),
                "comment_sentiment_emoji": sentiment_emoji(analyze_sentiment(cleaned)),
                "comment_author": str(comment.author),
                "comment_score": comment.score
            })

CPU times: user 1.74 s, sys: 93.7 ms, total: 1.84 s
Wall time: 1min 25s


## Analyzing the DataFrame

In [21]:
df = pd.DataFrame(all_data)

In [22]:
df

Unnamed: 0,youtube_title,reddit_post_title,post_score,post_url,post_created,post_sentiment,post_sentiment_emoji,comment,cleaned_comment,comment_sentiment,comment_sentiment_emoji,comment_author,comment_score
0,Brawl Talk: A NEW BRAWLER RARITY?!,A NEW BRAWLER RARITY?! Brawl Talk is tomorrow!,2815,https://i.redd.it/qie7r1pu3lve1.jpeg,2025-04-18 12:08:41,0.0000,😐,General reminder for subreddit members: Simple...,general reminder for subreddit members simple ...,0.5859,😃,AutoModerator,1
1,Brawl Talk: A NEW BRAWLER RARITY?!,A NEW BRAWLER RARITY?! Brawl Talk is tomorrow!,2815,https://i.redd.it/qie7r1pu3lve1.jpeg,2025-04-18 12:08:41,0.0000,😐,RYAN IS BACK BABY,ryan is back baby,0.0000,😐,Exciting-Year-2343,1070
2,Brawl Talk: A NEW BRAWLER RARITY?!,A NEW BRAWLER RARITY?! Brawl Talk is tomorrow!,2815,https://i.redd.it/qie7r1pu3lve1.jpeg,2025-04-18 12:08:41,0.0000,😐,the silhuette of the prawler is probably in en...,the silhuette of the prawler is probably in en...,0.7783,😃,gamer_withnolife,608
3,Brawl Talk: A NEW BRAWLER RARITY?!,BRAWL TALK LIVE DICUSSION: Talk about the new ...,11,https://www.reddit.com/gallery/1k2y8ba,2025-04-19 15:01:59,0.0000,😐,ultra legendary. jesus. \*frick.\*\n\nEDIT: I'...,ultra legendary jesus frick\n\nedit im scared ...,-0.6697,😠,Capital-Ad3018,11
4,Brawl Talk: A NEW BRAWLER RARITY?!,BRAWL TALK LIVE DICUSSION: Talk about the new ...,11,https://www.reddit.com/gallery/1k2y8ba,2025-04-19 15:01:59,0.0000,😐,They fucked up with the new rarity,they fucked up with the new rarity,-0.6597,😠,Alive-Skeleton,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,Star Wars Zero Company | Official Announce Tra...,Star Wars Zero Company | Official Announce Tra...,228,https://www.youtube.com/watch?v=rcxnRaZ6slU,2025-04-19 07:33:57,-0.5574,😠,I'm excited. For years I automatically ignored...,im excited for years i automatically ignored t...,0.1548,😐,BitterBubblegum,31
190,Star Wars Zero Company | Official Announce Tra...,Star Wars Zero Company | Official Announce Tra...,228,https://www.youtube.com/watch?v=rcxnRaZ6slU,2025-04-19 07:33:57,-0.5574,😠,"I'm not a huge Star Wars fan, but a tactical g...",im not a huge star wars fan but a tactical gam...,0.9330,😃,lars_rosenberg,4
191,Star Wars Zero Company | Official Announce Tra...,Star Wars Zero Company | Official Announce Tra...,90,https://www.youtube.com/watch?v=rcxnRaZ6slU,2025-04-19 07:38:31,-0.5574,😠,"Welcome back, commander.",welcome back commander,0.4588,😐,Solo_Wing_Buddy,50
192,Star Wars Zero Company | Official Announce Tra...,Star Wars Zero Company | Official Announce Tra...,90,https://www.youtube.com/watch?v=rcxnRaZ6slU,2025-04-19 07:38:31,-0.5574,😠,[More details here.](https://www.gamewatcher.c...,more details here most notably\n\nplayers will...,-0.0516,😐,green715,47


## Checking Downvoted Comments

In [23]:
df[df["comment_score"] < 0][["comment", "comment_score"]]

for comment, comment_score in zip(df["comment"], df["comment_score"]):
    if comment_score < 0:
        print(f"Comment:\n{comment}", f"Comment Score:\n{comment_score}", sep="\n\n")

## 💾 Save to CSV

In [24]:
df.to_csv("../data/reddit_data.csv", index=False)