In [1]:
import os
import praw
from dotenv import load_dotenv
import pandas as pd
from datetime import datetime

In [2]:
load_dotenv()

reddit = praw.Reddit(
    client_id=os.getenv('REDDIT_CLIENT_ID'),
    client_secret=os.getenv('REDDIT_CLIENT_SECRET'),
    user_agent=os.getenv('REDDIT_USER_AGENT'),
    username=os.getenv('REDDIT_USERNAME'),
    password=os.getenv('REDDIT_PASSWORD')
)

In [3]:
subreddits = {
    "gratitude": 1.0,
    "happy": 0.9,
    "TodayIAmHappy": 0.8,
    "BenignExistence": 0.7,
    "Emotions": 0.6,
    "offmychest": 0.4,
    "vent": 0.3,
    "lonely": 0.2,
    "Anxiety": 0.1,
    "SuicideWatch": 0.0
}

In [4]:
def fetch_from_subreddit(subreddit, score, limit=500):
    print(f"Fetching from r/{subreddit}...")
    posts = []

    for submission in reddit.subreddit(subreddit).hot(limit=limit):
        if submission.stickied or submission.score <= 1:
            continue

        title = submission.title
        selftext = submission.selftext or ''
        content = (title + "\n" + selftext).strip()

        if not title or not selftext:
            continue
        if len(content) < 30:
            continue

        posts.append({
            'title': title,
            'selftext': selftext,
            'subreddit': subreddit,
            'created_utc': datetime.fromtimestamp(submission.created_utc),
            'id': submission.id,
            'emotional_score': score
        })
    print(f"Collected {len(posts)} posts from r/{subreddit}")
    return posts


In [5]:
all_data = []
for sub, score in subreddits.items():
    try:
        posts = fetch_from_subreddit(sub, score, limit=500)
        all_data.extend(posts)
    except Exception as e:
        print(f"Failed for r/{sub}: {e}")

Fetching from r/gratitude...
Collected 318 posts from r/gratitude
Fetching from r/happy...
Collected 359 posts from r/happy
Fetching from r/TodayIAmHappy...
Collected 498 posts from r/TodayIAmHappy
Fetching from r/BenignExistence...
Collected 490 posts from r/BenignExistence
Fetching from r/Emotions...
Collected 295 posts from r/Emotions
Fetching from r/offmychest...
Collected 252 posts from r/offmychest
Fetching from r/vent...
Collected 278 posts from r/vent
Fetching from r/lonely...
Collected 381 posts from r/lonely
Fetching from r/Anxiety...
Collected 227 posts from r/Anxiety
Fetching from r/SuicideWatch...
Collected 457 posts from r/SuicideWatch


In [6]:
df = pd.DataFrame(all_data)
df

Unnamed: 0,title,selftext,subreddit,created_utc,id,emotional_score
0,I am grateful to be a Canadian,I'm grateful to be a Canadian living in Canada...,gratitude,2025-06-25 20:03:01,1lk7ac3,1.0
1,I'm grateful for my body,I'm grateful for my body. I'm grateful for lig...,gratitude,2025-06-25 19:33:14,1lk6jds,1.0
2,I'm grateful to adopt a new strategy for getti...,"I've held onto disappointments for years, even...",gratitude,2025-06-25 21:59:00,1lkab8w,1.0
3,i’m grateful to live in a world with clouds ☁️,it’s a beautiful day for a drive back home aft...,gratitude,2025-06-25 23:06:47,1lkc4i9,1.0
4,I'm grateful how a side benefit to my car bein...,I tend to only have enough food for a couple o...,gratitude,2025-06-25 17:26:10,1lk3oj2,1.0
...,...,...,...,...,...,...
3550,It broke.,Fuck. Fuck. Fuck. Fuck. Fuck. Fuck. Fuck. Fuck...,SuicideWatch,2025-06-24 13:05:57,1lj4war,0.0
3551,goodbye,i love this server and thx everyone who notice...,SuicideWatch,2025-06-23 23:36:27,1lins2w,0.0
3552,Can't I just stop being a bitch and do it alre...,I want to kill myself so fucking badly but I'm...,SuicideWatch,2025-06-24 02:51:14,1lisumx,0.0
3553,Why do I want to die?,"I am passively suicidal, anyone understand me?...",SuicideWatch,2025-06-24 10:47:51,1lj2qnx,0.0


In [9]:
df.to_csv("../data/raw/reddit_emotions.csv", index=False)