In [6]:
import os
import praw
from dotenv import load_dotenv
import pandas as pd
from datetime import datetime

In [7]:
load_dotenv()

reddit = praw.Reddit(
    client_id=os.getenv('REDDIT_CLIENT_ID'),
    client_secret=os.getenv('REDDIT_CLIENT_SECRET'),
    user_agent=os.getenv('REDDIT_USER_AGENT'),
    username=os.getenv('REDDIT_USERNAME'),
    password=os.getenv('REDDIT_PASSWORD')
)

In [8]:
subreddits = {
    "gratitude": 1.0,
    "happy": 0.9,
    "TodayIAmHappy": 0.8,
    "BenignExistence": 0.7,
    "Emotions": 0.6,
    "offmychest": 0.4,
    "vent": 0.3,
    "lonely": 0.2,
    "Anxiety": 0.1,
    "SuicideWatch": 0.0
}

In [9]:
def fetch_from_subreddit(subreddit, score, limit):
    print(f"Fetching from r/{subreddit}...")
    posts = []

    for submission in reddit.subreddit(subreddit).hot(limit=limit):
        if submission.stickied or submission.score <= 1:
            continue

        title = submission.title
        selftext = submission.selftext or ''
        content = (title + "\n" + selftext).strip()

        if not title or not selftext:
            continue
        if len(content) < 30:
            continue

        posts.append({
            'title': title,
            'selftext': selftext,
            'subreddit': subreddit,
            'created_utc': datetime.fromtimestamp(submission.created_utc),
            'id': submission.id,
            'emotional_score': score
        })
    print(f"Collected {len(posts)} posts from r/{subreddit}")
    return posts


In [10]:
all_data = []
for sub, score in subreddits.items():
    try:
        posts = fetch_from_subreddit(sub, score, limit=1500)
        all_data.extend(posts)
    except Exception as e:
        print(f"Failed for r/{sub}: {e}")

Fetching from r/gratitude...
Collected 674 posts from r/gratitude
Fetching from r/happy...
Collected 480 posts from r/happy
Fetching from r/TodayIAmHappy...
Collected 806 posts from r/TodayIAmHappy
Fetching from r/BenignExistence...
Collected 969 posts from r/BenignExistence
Fetching from r/Emotions...
Collected 516 posts from r/Emotions
Fetching from r/offmychest...
Collected 399 posts from r/offmychest
Fetching from r/vent...
Collected 428 posts from r/vent
Fetching from r/lonely...
Collected 753 posts from r/lonely
Fetching from r/Anxiety...
Collected 459 posts from r/Anxiety
Fetching from r/SuicideWatch...
Collected 649 posts from r/SuicideWatch


In [11]:
df = pd.DataFrame(all_data)
df

Unnamed: 0,title,selftext,subreddit,created_utc,id,emotional_score
0,He’s gone… but I walked the stage in his old w...,"Last week, I graduated college, the first in m...",gratitude,2025-07-17 18:31:20,1m26xq1,1.0
1,I'm grateful that ChatGPT presents information...,There's a venue I want to visit today. All I ...,gratitude,2025-07-17 19:24:54,1m286ia,1.0
2,Grateful for a cup of nice orange juice.,I don't drink alcohol. So when I'm feeling blu...,gratitude,2025-07-17 20:05:28,1m2971s,1.0
3,I am grateful for a deep breath that instantly...,Gratitude Practice Day 61,gratitude,2025-07-17 15:30:42,1m23j0l,1.0
4,How to be grateful when your feeling behind in...,Hey everyone! I’ve been trying to be grateful ...,gratitude,2025-07-17 09:46:46,1m1xz7r,1.0
...,...,...,...,...,...,...
6128,Is this wrong of me?,This is going to sound wrong but after seeing ...,SuicideWatch,2025-07-14 22:33:05,1lzrzj5,0.0
6129,Tomorrow,"Tomorrow, I will drop my kids off, go to work,...",SuicideWatch,2025-07-14 08:34:56,1lzbkr8,0.0
6130,Everything is just terrible..,"My health is not doing great, debt sky high, n...",SuicideWatch,2025-07-14 14:54:53,1lzhuo3,0.0
6131,I want to die because of my intrusive thoughts,It's hard for me to even write this out becaus...,SuicideWatch,2025-07-14 12:19:42,1lzfi1o,0.0


In [12]:
df.to_csv("../data/raw/reddit_emotions.csv", index=False)