## Getting API Keys

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

REDDIT_ID = os.getenv("REDDIT_CLIENT_ID")
REDDIT_SECRET = os.getenv("REDDIT_CLIENT_SECRET")

## 📦 Install required libraries

In [None]:
# 📚 Imports
import praw
import pandas as pd
import re
import random
import time
from datetime import datetime, timedelta
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
import nltk
nltk.download("punkt")



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## 🔑 Reddit API Setup (Read-only mode)

In [None]:
reddit = praw.Reddit(
    client_id=REDDIT_ID,
    client_secret=REDDIT_SECRET,
    user_agent="youtube_to_reddit_sentiment"
)

## 📥 Load video titles


In [None]:
youtube_df = pd.read_csv("data/youtube_data.csv")

# 🕰️ Optional: Calculate Reddit 'time_filter' based on YouTube’s oldest timestamp
# We'll mock it as 'month' or 'week' here (PRAW only allows fixed ranges)
time_filter = "month"

## 🧹 Clean text

In [4]:
def clean_text(text):
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
    return text.lower()

## 📊 Sentiment analysis


In [5]:
analyzer = SentimentIntensityAnalyzer()
def analyze_sentiment(text):
    return analyzer.polarity_scores(text)["compound"]

In [6]:
# 😃 Convert score to emoji
def sentiment_emoji(score):
    if score >= 0.5:
        return "😃"
    elif score <= -0.5:
        return "😠"
    else:
        return "😐"

## 🧠 Reddit Post + Comment collector


In [7]:
%%time
all_data = []

for topic in youtube_df["video_title"].unique():
    posts = reddit.subreddit("all").search(query=topic, limit=5, time_filter=time_filter)

    for post in posts:
        post_data = {
            "youtube_title": topic,
            "reddit_post_title": post.title,
            "post_score": post.score,
            "post_url": post.url,
            "post_created": pd.to_datetime(post.created_utc, unit="s"),
            "post_sentiment": analyze_sentiment(clean_text(post.title)),
            "post_sentiment_emoji": sentiment_emoji(analyze_sentiment(clean_text(post.title))),
        }

        # ⛓️ Add top 3 comments per post
        post.comments.replace_more(limit=0)
        comments = post.comments[:3]
        for comment in comments:
            cleaned = clean_text(comment.body)
            all_data.append({
                **post_data,
                "comment": comment.body,
                "cleaned_comment": cleaned,
                "comment_sentiment": analyze_sentiment(cleaned),
                "comment_sentiment_emoji": sentiment_emoji(analyze_sentiment(cleaned)),
                "comment_author": str(comment.author),
                "comment_score": comment.score
            })

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

CPU times: user 1.24 s, sys: 52.8 ms, total: 1.29 s
Wall time: 45.4 s


## Analyzing the DataFrame

In [8]:
df = pd.DataFrame(all_data)

In [9]:
df

Unnamed: 0,youtube_title,reddit_post_title,post_score,post_url,post_created,post_sentiment,post_sentiment_emoji,comment,cleaned_comment,comment_sentiment,comment_sentiment_emoji,comment_author,comment_score
0,Doechii - Anxiety (Official Video),Doechii - Anxiety (Official Music Video),479,https://youtu.be/riCP9x31Kuk?si=AUv4Yrakb3hKfwPN,2025-04-18 15:06:28,-0.1779,😐,Stream Anxiety by Doechii out now on Apple Mus...,stream anxiety by doechii out now on apple mus...,0.1531,😐,AutoModerator,1
1,Doechii - Anxiety (Official Video),Doechii - Anxiety (Official Music Video),479,https://youtu.be/riCP9x31Kuk?si=AUv4Yrakb3hKfwPN,2025-04-18 15:06:28,-0.1779,😐,"such a great music video! Also, I love how she...",such a great music video also i love how she r...,0.6310,😃,shouyos,89
2,Doechii - Anxiety (Official Video),Doechii - Anxiety (Official Music Video),479,https://youtu.be/riCP9x31Kuk?si=AUv4Yrakb3hKfwPN,2025-04-18 15:06:28,-0.1779,😐,the homage to her original video is spot on \n...,the homage to her original video is spot on \n...,0.6697,😃,silent-radio4,64
3,Doechii - Anxiety (Official Video),Doechii - Anxiety (Official Video),0,https://www.youtube.com/watch?v=riCP9x31Kuk,2025-04-18 15:15:29,-0.1779,😐,"i mean, i don't think so but it's a really goo...",i mean i dont think so but its a really good v...,0.6474,😃,beholdthecolossus,1
4,Doechii - Anxiety (Official Video),Doechii - Anxiety (Official Video),0,https://www.youtube.com/watch?v=riCP9x31Kuk,2025-04-18 15:15:29,-0.1779,😐,"Good video, bad song, and I don't see any refe...",good video bad song and i dont see any referen...,-0.1531,😐,leninzen,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,The Best Things In Life Are Unexpected... WE'R...,She’s 13 years young,511,https://i.redd.it/q250ofrjj2qe1.jpeg,2025-03-21 16:22:08,0.0000,😐,Love your story and Cookie. So nice to hear go...,love your story and cookie so nice to hear goo...,0.8937,😃,Final-Context6625,5
62,The Best Things In Life Are Unexpected... WE'R...,She’s 13 years young,511,https://i.redd.it/q250ofrjj2qe1.jpeg,2025-03-21 16:22:08,0.0000,😐,Cookie is a doll! What a sweet story.,cookie is a doll what a sweet story,0.4588,😐,NeighborhoodReal6954,5
63,The Best Things In Life Are Unexpected... WE'R...,Antonio strikes again! Anyone have “The Love I...,34,https://i.redd.it/afasnym2jjqe1.jpeg,2025-03-24 01:29:45,0.6705,😃,What I love about these Grace and Antonio stor...,what i love about these grace and antonio stor...,0.9201,😃,AgitatedHorror9355,19
64,The Best Things In Life Are Unexpected... WE'R...,Antonio strikes again! Anyone have “The Love I...,34,https://i.redd.it/afasnym2jjqe1.jpeg,2025-03-24 01:29:45,0.6705,😃,https://author.techdalan.com/the-woman-they-tw...,\n\nor perhaps this one grace and antonio need...,0.4215,😐,Summer_Spring_,11


## Checking Downvoted Comments

In [10]:
df[df["comment_score"] < 0][["comment", "comment_score"]]

for comment, comment_score in zip(df["comment"], df["comment_score"]):
    if comment_score < 0:
        print(f"Comment:\n{comment}", f"Comment Score:\n{comment_score}", sep="\n\n")

## 💾 Save to CSV

In [None]:
df.to_csv("data/reddit_data.csv", index=False)