# test reddit api

In [4]:
import os

from dotenv import load_dotenv
import praw
load_dotenv()

True

In [5]:
try:
    reddit = praw.Reddit(
        client_id = os.getenv("REDDIT_CLIENT_ID"),
        client_secret = os.getenv("REDDIT_CLIENT_SECRET"),
        password = os.getenv("REDDIT_PASSWORD"),
        user_agent = os.getenv("REDDIT_USER_AGENT"),
        username = os.getenv("REDDIT_USERNAME")
    )
except Exception as e:
    print(f"Error initializing Reddit client: {e}")

print(reddit.user.me())

Fabulous_Cherry2510


In [6]:
subreddit = reddit.subreddit("news")
posts = subreddit.hot(limit=10)

In [7]:
posts = list(posts)

In [8]:
for post in posts:
    print(f"Title: {post.title}")
    print(f"selftext: {post.selftext}")
    print("-" * 40)

Title: ICE arrested a 6-year-old boy with leukemia at immigration court. His family is suing.
selftext: 
----------------------------------------
Title: College graduates face toughest job market in more than a decade as hiring slows
selftext: 
----------------------------------------
Title: Man who killed London schoolboy with samurai sword is convicted of murder
selftext: 
----------------------------------------
Title: US economy shrank 0.5% between January and March, worse than 2 earlier estimates had revealed
selftext: 
----------------------------------------
Title: Meta wins AI copyright lawsuit as US judge rules against authors
selftext: 
----------------------------------------
Title: Hundreds of weight loss and diabetes jab users report pancreas problems
selftext: 
----------------------------------------
Title: Family members outraged as U.S. citizen detained by federal agents in downtown LA on way to work
selftext: 
----------------------------------------
Title: Billionair

In [None]:
popular = reddit.subreddits.popular(limit=200)
for sub in popular:
    print(sub.display_name)

# test target subreddits

In [3]:
import os

from dotenv import load_dotenv
import praw
load_dotenv(".env.dev")

True

In [4]:
try:
    print(reddit.user.me())
except Exception:
    print("Initializing Reddit client again...")
    reddit = praw.Reddit(
        client_id = os.getenv("REDDIT_CLIENT_ID"),
        client_secret = os.getenv("REDDIT_CLIENT_SECRET"),
        password = os.getenv("REDDIT_PASSWORD"),
        user_agent = os.getenv("REDDIT_USER_AGENT"),
        username = os.getenv("REDDIT_USERNAME")
    )

    print(reddit.user.me())

Initializing Reddit client again...
Fabulous_Cherry2510


In [6]:
import json
from prawcore.exceptions import NotFound, Forbidden
import time

def is_valid_subreddit(name, reddit):
    try:
        reddit.subreddits.search_by_name(name, exact=True)
        return True
    except (NotFound, Forbidden):
        return False
    except Exception as e:
        print(f"⚠️ Unexpected error while checking {name}: {e}")
        return False

def validate_all_subreddits(filepath: str, reddit):
    with open(filepath, "r") as f:
        sub_dict = json.load(f)

    invalid = []

    for category, subs in sub_dict.items():
        print(f"\n🔍 Validating category: {category}")
        for sub in subs:
            if not is_valid_subreddit(sub, reddit):
                print(f"❌ Invalid: {sub}")
                invalid.append(sub)
            else:
                print(f"✅ Valid: {sub}")
            time.sleep(0.3)  # Respect API rate limit

    print("\n⛔ Invalid Subreddits:")
    for sub in invalid:
        print(f"- {sub}")

    return invalid


In [7]:
from app.reddit.fetch import reddit  # or however you initialize reddit
validate_all_subreddits("/home/nech/projects/reddit-meter-backend/subreddits.json", reddit)



🔍 Validating category: News & Politics
✅ Valid: news
✅ Valid: worldnews
✅ Valid: politics
✅ Valid: nottheonion
✅ Valid: todayilearned
✅ Valid: explainlikeimfive
✅ Valid: AskHistorians
✅ Valid: europe
✅ Valid: AskUK
✅ Valid: AskMen
✅ Valid: AskWomen

🔍 Validating category: Finance & Economics
✅ Valid: wallstreetbets
✅ Valid: personalfinance
✅ Valid: PersonalFinanceCanada
✅ Valid: Frugal
✅ Valid: povertyfinance
✅ Valid: recruitinghell
✅ Valid: Scams
✅ Valid: crypto
✅ Valid: finance

🔍 Validating category: Education & Questions
✅ Valid: AskReddit
✅ Valid: NoStupidQuestions
✅ Valid: OutOfTheLoop
✅ Valid: TooAfraidToAsk
✅ Valid: explainlikeimfive
✅ Valid: changemyview
✅ Valid: learnprogramming
✅ Valid: cscareerquestions

🔍 Validating category: Pop Culture & Entertainment
✅ Valid: movies
✅ Valid: television
✅ Valid: Fauxmoi
✅ Valid: popculturechat
✅ Valid: Kanye
✅ Valid: marvelstudios
✅ Valid: TwoXChromosomes
✅ Valid: JoeRogan
✅ Valid: comics

🔍 Validating category: Gaming
✅ Valid: gaming
✅

['thedatalist']

In [13]:
subreddit_json = "./subreddits.json"
subs_by_category = json.load(open(subreddit_json, "r"))
for category, subs in subs_by_category.items():
    print(f"Category: {category}")
    for sub in subs:
        # try getting subreddit info using api
        subreddit = reddit.subreddit(sub)
        if subreddit:
            pass
            # print(f"subreddit {subreddit.display_name} ({sub}) exists!")
        else:
            print(f"subreddit {sub} does not exist or could not be fetched.")
    print("-" * 40)

Category: News & Politics
----------------------------------------
Category: Finance & Economics
----------------------------------------
Category: Education & Questions
----------------------------------------
Category: Pop Culture & Entertainment
----------------------------------------
Category: Gaming
----------------------------------------
Category: Tech & Programming
----------------------------------------
Category: Data & Analytics
----------------------------------------
Category: Art & Creativity
----------------------------------------
Category: Memes & Humor
----------------------------------------
Category: Mental Health & Self
----------------------------------------
Category: Social & Cultural Commentary
----------------------------------------
Category: DIY, Science & Niche Interests
----------------------------------------
Category: Conflict & Reality
----------------------------------------
Category: NSFW & Fringe
----------------------------------------


# test inference:

In [2]:
from transformers import pipeline

classifier = pipeline(
    "text-classification",
    model="bhadresh-savani/distilbert-base-uncased-emotion",
    top_k=None,
)

def run_batch_inference(texts: list[str]) -> list[dict]:
    # Truncate each to 512 characters
    truncated_texts = [text[:512] for text in texts]
    results = classifier(truncated_texts)

    return [
        {res["label"]: res["score"] for res in result}
        for result in results
    ]


Device set to use cuda:0


In [6]:
inputs = [
    'I had such a great day today! The weather was perfect and I got to spend time with my friends.',
    'I am so angry at my friend for canceling our plans last minute. I feel betrayed and hurt.',
    'I am feeling really sad today. I just can\'t shake off this feeling of emptiness.',
    'I am so excited for the concert next week! I cant wait to see my favorite band perform live.',
    'I am feeling really anxious about the upcoming exam. I just can\'t seem to focus on studying.',
    "我今天过得真开心！天气很好，我和朋友们一起度过了美好时光。",
    "我对朋友临时取消计划感到非常生气。我感到被背",
]

print("Running batch inference...")
results = run_batch_inference(inputs)

for text, result in zip(inputs, results):
    print(f"Text: {text}")
    print("Emotions:")
    for emotion, score in result.items():
        print(f"  {emotion}: {score:.4f}")
    print("-" * 40)

Running batch inference...
Text: I had such a great day today! The weather was perfect and I got to spend time with my friends.
Emotions:
  joy: 0.9984
  sadness: 0.0004
  love: 0.0004
  surprise: 0.0003
  anger: 0.0003
  fear: 0.0002
----------------------------------------
Text: I am so angry at my friend for canceling our plans last minute. I feel betrayed and hurt.
Emotions:
  sadness: 0.9319
  fear: 0.0430
  love: 0.0115
  anger: 0.0089
  joy: 0.0034
  surprise: 0.0013
----------------------------------------
Text: I am feeling really sad today. I just can't shake off this feeling of emptiness.
Emotions:
  sadness: 0.9991
  love: 0.0002
  anger: 0.0002
  fear: 0.0002
  joy: 0.0002
  surprise: 0.0001
----------------------------------------
Text: I am so excited for the concert next week! I cant wait to see my favorite band perform live.
Emotions:
  joy: 0.9900
  love: 0.0087
  sadness: 0.0005
  surprise: 0.0004
  anger: 0.0003
  fear: 0.0002
---------------------------------------

# test reddit + inference

In [1]:
from app.ml.inference import run_batch_inference
from app.reddit.fetch import fetch_subreddit_posts

Device set to use cuda:0


Logged in as Fabulous_Cherry2510


In [2]:
posts = fetch_subreddit_posts("AITAH", required_posts=5)

Processing submission: 1lh6ht3 - Looking for more mods!
Processing submission: 1l4t34l - AITAH for banning users with scam links and other domains mostly bots use?
Processing submission: 1lkciv6 - AITA for silently changing my son's name after my brother and SIL gave my nephew the name too?
Processing submission: 1lk5y9b - AITAH for telling my mom I don’t forgive her for choosing her boyfriend over me when I was a kid?
Processing submission: 1lk857o - AITAH: for postponing the wedding when my fiance was demanding to be paid back the money he paid for his late wife's surgery?
Processing submission: 1lkdiuf - AITAH for kicking my mom out of my graduation dinner after she brought her husband instead of my dad?


In [5]:
print(posts[0])

IndexError: list index out of range

In [4]:
texts = []
for post in posts:
    text = post["title"] + " " + (post["text"] or "")
    for comment in post["comments"]:
        text += " " + comment["body"]
    print(  f"Post: {text}")
    print(run_batch_inference([text]))
    texts.append(text)


Post: AITAH for banning users with scam links and other domains mostly bots use? Hello AITAH community!

Since our head mod began recruiting efforts a few months ago, we've expanded our moderation team and increased our toolkit to try to give you the best experience this sub can offer. Our last mod announcement was unfortunately on April 1st but we assure you our efforts are not a joke. We care about this community and want to see the quality in this community continue to improve.

Here are a few changes we've implemented over the last few months since the new team came on:


Rules: Rules have been refined and expounded upon. You may have noticed some comments removed for name calling or incivility. Reports from users really help us find these (theme). We have put the rules in the sidebar, the new.reddit sidebar, and the wiki. No matter how you reddit, the new rules are there, you should see them and maybe take a moment to review them. If we were to undergo anything more drastic than c

In [10]:
results = run_batch_inference(texts)
print("results:", results)

results: [{'sadness': 0.05724615231156349, 'joy': 0.48117128014564514, 'love': 0.014398619532585144, 'anger': 0.27983638644218445, 'fear': 0.15719150006771088, 'surprise': 0.010156083852052689}, {'sadness': 0.0018915304681286216, 'joy': 0.004828240256756544, 'love': 0.0007443905924446881, 'anger': 0.9888409972190857, 'fear': 0.003400283632799983, 'surprise': 0.0002944710140582174}, {'sadness': 0.126068577170372, 'joy': 0.24193672835826874, 'love': 0.0032752440311014652, 'anger': 0.6107820868492126, 'fear': 0.015659986063838005, 'surprise': 0.002277401043102145}, {'sadness': 0.9744206666946411, 'joy': 0.0011943706776946783, 'love': 0.000621794315520674, 'anger': 0.006209913641214371, 'fear': 0.017270173877477646, 'surprise': 0.0002830728190019727}, {'sadness': 0.2812051773071289, 'joy': 0.09164117276668549, 'love': 0.0033936104737222195, 'anger': 0.6094412207603455, 'fear': 0.012739568017423153, 'surprise': 0.0015793151687830687}]


# test firestore:

In [9]:
import os

from dotenv import load_dotenv

load_dotenv()

True

In [13]:
from google.cloud import firestore
from datetime import datetime

# Initialize Firestore client
db = firestore.Client(database=os.getenv("FIRESTORE_DATABASE_ID"))

# Sample test data
test_data = {
    "joy": 0.75,
    "anger": 0.05,
    "sadness": 0.1,
    "fear": 0.03,
    "love": 0.05,
    "surprise": 0.02,
    "timestamp": datetime.now().isoformat(),
    "updatedAt": firestore.SERVER_TIMESTAMP
}

# Write to sentiment_history collection (timestamped doc)
hour_key = datetime.now().strftime('%Y-%m-%dT%H')
doc_ref = db.collection("sentiment_history").document(hour_key)
doc_ref.set(test_data)

print(f"✅ Saved to sentiment_history/{hour_key}")


✅ Saved to sentiment_history/2025-06-25T23


# test firestore.py

In [2]:
from app.storage.firestore import save_sentiment_summary
from datetime import datetime

test_data = {
    "joy": 0.65,
    "anger": 0.15,
    "sadness": 0.1,
    "fear": 0.03,
    "love": 0.05,
    "surprise": 0.02,
}
save_sentiment_summary(test_data)

✅ Saved sentiment snapshot to Firestore.


# test firestore.py and inference.py and reddit.py

In [1]:
from dotenv import load_dotenv
load_dotenv()

from app.reddit.fetch import fetch_subreddit_posts, fetch_all_subreddit_posts_by_dict
from app.ml.inference import run_batch_inference
from app.storage.firestore import save_sentiment_summary
from app.processing.aggregate import compute_sentiment_average

Logged in as Fabulous_Cherry2510


Device set to use cuda:0


In [2]:
test_subs = {
  "News & Politics": [
    "news", "worldnews", "politics"
  ]
}

results = fetch_all_subreddit_posts_by_dict(
    test_subs,
    posts_per_subreddit=5,
    comment_per_post=2)

print(results)


Processing submission: 1lkzl73 - ICE arrested a 6-year-old boy with leukemia at immigration court. His family is suing.
Processing submission: 1lkzbjy - College graduates face toughest job market in more than a decade as hiring slows
Processing submission: 1lkzz0z - US economy shrank 0.5% between January and March, worse than 2 earlier estimates had revealed
Processing submission: 1ll4pek - Critical hurricane forecast tool abruptly terminated
Processing submission: 1lkxkao - Man who killed London schoolboy with samurai sword is convicted of murder
Fetched 5 posts from news in category News & Politics
Processing submission: 1lkqi89 - /r/WorldNews Live Thread: Russian Invasion of Ukraine Day 1218, Part 1 (Thread #1365)
Processing submission: 1lkxucx - Cocaine use and production hits record high
Processing submission: 1lktdyc - Explosions Rock Moscow as 50 Drones Target Russian Regions, Vnukovo Flights Diverted
Processing submission: 1ll2ljj - Indian court rules trans women are women and 

In [6]:
sentiment_by_subreddit = {}
for category, subs in results.items():
    for sub in subs:
        sub_name = sub['name']
        posts = []
        for post in sub['posts']:
            text = post['title'] + " " + (post['text'] or "")
            for comment in post['comments']:
                text += " " + comment['body']
            posts.append(text)
        if posts:
            print(f"Running inference for subreddit: {sub_name} with {len(posts)} posts")
            sentiment_results = run_batch_inference(posts)
            # average the results for this subreddit
            print(sentiment_results)
            avg_result = compute_sentiment_average(sentiment_results)
            sentiment_by_subreddit[sub_name] = avg_result
            print(f"Sentiment for {sub_name}: {avg_result}")


Running inference for subreddit: news with 5 posts
[{'fear': 0.9904718399047852, 'anger': 0.0055731842294335365, 'sadness': 0.0028303461149334908, 'joy': 0.0004994028713554144, 'surprise': 0.00044398033060133457, 'love': 0.00018132974219042808}, {'surprise': 0.9906880855560303, 'fear': 0.003578940872102976, 'joy': 0.0029236769769340754, 'sadness': 0.0011165202595293522, 'love': 0.0009013584349304438, 'anger': 0.000791358994320035}, {'joy': 0.9051741361618042, 'sadness': 0.04768359288573265, 'anger': 0.038989465683698654, 'fear': 0.0038983437698334455, 'love': 0.0024546014610677958, 'surprise': 0.001799727906472981}, {'anger': 0.8802027702331543, 'fear': 0.10501456260681152, 'sadness': 0.008181724697351456, 'joy': 0.004429323598742485, 'surprise': 0.0017169169150292873, 'love': 0.00045459120883606374}, {'love': 0.7317546010017395, 'anger': 0.14237374067306519, 'fear': 0.05145767703652382, 'sadness': 0.04027293622493744, 'joy': 0.0274996105581522, 'surprise': 0.006641405168920755}]
Senti

In [7]:
# average for all subreddits
overall_avg = {}
for sub, sentiment in sentiment_by_subreddit.items():
    for label, score in sentiment.items():
        overall_avg[label] = overall_avg.get(label, 0) + score
# Normalize by the number of subreddits
for label in overall_avg:
    overall_avg[label] /= len(sentiment_by_subreddit)
print(f"Overall sentiment: {overall_avg}")

Overall sentiment: {'fear': 0.15843882087695724, 'anger': 0.4238901470632603, 'sadness': 0.013963984112100052, 'joy': 0.28359871040253587, 'surprise': 0.06811544822606568, 'love': 0.0519928813475417}


In [8]:
save_sentiment_summary(overall_avg)

✅ Saved sentiment snapshot to Firestore.
