In [1]:
pip install praw

Note: you may need to restart the kernel to use updated packages.


In [3]:
!pip install typing-extensions --upgrade

Collecting typing-extensions
  Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)
Using cached typing_extensions-4.12.2-py3-none-any.whl (37 kB)
Installing collected packages: typing-extensions
Successfully installed typing-extensions-4.12.2


In [2]:
import os
from dotenv import load_dotenv
import praw
import spacy

# Load environment variables from cred.env
load_dotenv("../config/cred.env")

CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")
USERNAME = os.getenv("REDDIT_USERNAME")
PASSWORD = os.getenv("REDDIT_PASSWORD")
USER_AGENT = os.getenv("USER_AGENT")

print(f"CLIENT_ID: {CLIENT_ID}")  # Should print a valid client ID
print(f"USER_AGENT: {USER_AGENT}")  # Should print your user agent
print(f"USERNAME: {USERNAME}")  # Should print your Reddit username
# print(f"PASSWORD: {PASSWORD}") 
# print(f"CLIENT_SECRET: {CLIENT_SECRET}")

# Initialize Reddit API connection
reddit = praw.Reddit(
    client_id=CLIENT_ID,
    client_secret=CLIENT_SECRET,
    username=USERNAME,
    password=PASSWORD,
    user_agent=USER_AGENT
)

# Test the connection by printing your Reddit username
print("Connected as:", reddit.user.me())

# Load spaCy NLP model
nlp = spacy.load("en_core_web_md")  # Medium model for better accuracy

# Define core disaster-related words
disaster_terms = ["earthquake","flood","hurricane", "tornado","cyclone", "accident", "car crash", "gas leak", "incident"]

# Convert disaster terms into NLP vectors
disaster_nlp_terms = [nlp(term) for term in disaster_terms]

# Function to check similarity of a Reddit post title with disaster terms
def is_disaster_related(text, threshold=0.4):  # Adjust threshold for sensitivity
    doc = nlp(text)

    if not doc.has_vector:
        print(f"⚠️ No vector found for text: '{text}'")
        return False
    
    for disaster_term in disaster_nlp_terms:
        if doc.similarity(disaster_term) > threshold:
            print(f"related disaster term found {disaster_term}")
            return True
    return False

# Stream posts from Reddit and check for disasters
subreddit = reddit.subreddit("testingground4bots")  # Monitor all subreddits

import json
import time

# List to store detected posts
detected_posts = []

# Fetch past posts + monitor new ones
for post in subreddit.new(limit=10):  # Fetch last 10 posts first
    title = post.title.lower()
    selftext = post.selftext.lower() if post.selftext else ""  # Ensure selftext isn't None
    # process posts
    if is_disaster_related(title) or is_disaster_related(selftext):  # Check if post is related to disasters
        post_data = {
            "title": post.title,
            "selftext": post.selftext,
            "url": post.url,
            "author": str(post.author),
            "author_fullname": getattr(post.author, "fullname", None),
            "author_premium": getattr(post.author, "is_premium", None),
            "subreddit_name_prefixed": post.subreddit_name_prefixed,
            "subreddit_subscribers": post.subreddit.subscribers,
            "link_flair_text": post.link_flair_text,
            "num_comments": post.num_comments,
            "upvote_ratio": post.upvote_ratio,
            "ups": post.ups,
            "edited": post.edited,
            "created_utc": post.created_utc,
            "domain": post.domain,
            "mod_reports": post.mod_reports,
            "user_reports": post.user_reports,
            "removed_by": post.removed_by,
            "banned_by": post.banned_by
        }

        detected_posts.append(post_data)
        print(detected_posts)
        print("one post appended")

        # Send every 5 posts to the classifier
        if len(detected_posts) % 1 == 0:
            with open("detected_posts.json", "w") as f:
                json.dump(detected_posts, f, indent=4)
            print(f"🚨 Saved {len(detected_posts)} posts. Ready for classification.")

        time.sleep(2)  # Avoid hitting API rate limits
print(f"/nchecked all last 10posts/n")
print(f"/nchecking real-time data/n")

for post in subreddit.stream.submissions(skip_existing=True):
    title = post.title.lower()
    selftext = post.selftext.lower() if post.selftext else ""  # Ensure selftext isn't None


    if is_disaster_related(title) or is_disaster_related(selftext):  # Check if post is related to disasters
        post_data = {
            "title": post.title,
            "selftext": post.selftext,
            "url": post.url,
            "author": str(post.author),
            "author_fullname": getattr(post.author, "fullname", None),
            "author_premium": getattr(post.author, "is_premium", None),
            "subreddit_name_prefixed": post.subreddit_name_prefixed,
            "subreddit_subscribers": post.subreddit.subscribers,
            "link_flair_text": post.link_flair_text,
            "num_comments": post.num_comments,
            "upvote_ratio": post.upvote_ratio,
            "ups": post.ups,
            "edited": post.edited,
            "created_utc": post.created_utc,
            "domain": post.domain,
            "mod_reports": post.mod_reports,
            "user_reports": post.user_reports,
            "removed_by": post.removed_by,
            "banned_by": post.banned_by
        }

        detected_posts.append(post_data)
        print("one post appended")
        print(detected_posts)

        # Send every 5 posts to the classifier
        if len(detected_posts) % 1 == 0:
            with open("detected_posts.json", "w") as f:
                json.dump(detected_posts, f, indent=4)
            print(f"🚨 Saved {len(detected_posts)} posts. Ready for classification.")

        time.sleep(2)  # Avoid hitting API rate limits


CLIENT_ID: IolG33JrO8tfKd-r4Ro9xw
USER_AGENT: DisasterAlert/1.0 by Short_Low2224
USERNAME: Short_Low2224
Connected as: Short_Low2224
⚠️ No vector found for text: 'helleone'
related disaster term found flood
[{'title': 'Reflections on Truth and Self-Discovery', 'selftext': 'Truth is the mirror reflecting our inner depths, revealing hidden complexities of the soul. It dances between light and shadow, challenging our perceptions of reality. In its embrace, both burden and liberation intertwine, urging us toward authenticity. Ever elusive and ever-changing, truth guides us on the endless path of self-discovery.', 'url': 'https://www.reddit.com/r/testingground4bots/comments/1jznd7n/reflections_on_truth_and_selfdiscovery/', 'author': 'legion_in_self', 'author_fullname': 't2_1hh7hgnmns', 'author_premium': None, 'subreddit_name_prefixed': 'r/testingground4bots', 'subreddit_subscribers': 1885, 'link_flair_text': None, 'num_comments': 0, 'upvote_ratio': 1.0, 'ups': 1, 'edited': False, 'created_u

KeyboardInterrupt: 