### Import Libraries

In [11]:
import praw
import pandas as pd
import joblib
from datetime import datetime, timezone
from collections import defaultdict

### Load Model

In [12]:
MODEL_PATH = "spread_risk_model.joblib"

In [10]:
reddit = praw.Reddit(
    client_id="Lr6Uh2UAq-JI_zjFEs0K4g",
    client_secret="-QFAmBnQ-ihiJSaauZaQymfKTf_qKw",
    user_agent="violence-dataset-scraper:v1.2"
)

print("Read only:", reddit.read_only)



Read only: True


### Helpers


In [13]:
def build_tree(reactions):
    children = defaultdict(list)
    for r in reactions:
        if r["parent"]:
            children[r["parent"]].append(r["id"])
    return children

def cascade_depth(node, children, depth=0):
    if node not in children:
        return depth
    return max(cascade_depth(child, children, depth + 1)
               for child in children[node])

def cascade_width(children):
    return max((len(v) for v in children.values()), default=0)

def early_reactions(reactions, source_time, minutes):
    cutoff = source_time.timestamp() + minutes * 60
    return sum(1 for r in reactions if r["time"].timestamp() <= cutoff)

### Predict Risk

In [14]:
def predict_reddit_spread_risk(url):
    submission = reddit.submission(url=url)
    submission.comments.replace_more(limit=0)

    source_time = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)

    reactions = []
    for c in submission.comments.list():
        reactions.append({
            "id": c.id,
            "parent": c.parent_id.replace("t1_", "").replace("t3_", ""),
            "time": datetime.fromtimestamp(c.created_utc, tz=timezone.utc)
        })

    children = build_tree(reactions)

    features = {
        "total_reactions": len(reactions),
        "reactions_30min": early_reactions(reactions, source_time, 30),
        "reactions_60min": early_reactions(reactions, source_time, 60),
        "cascade_depth": cascade_depth(submission.id, children),
        "cascade_width": cascade_width(children),

        # user features unavailable on Reddit â†’ set 0 (same as training)
        "tweets_per_day": 0,
        "avg_gap_minutes": 0,
        "burstiness": 0,
        "topic_entropy": 0,
    }

    X = pd.DataFrame([features])

    pipeline = joblib.load(MODEL_PATH)
    risk_prob = pipeline.predict_proba(X)[0][1]

    return features, risk_prob



### Main Function

In [17]:
if __name__ == "__main__":
    url = input("Paste Reddit thread URL:\n").strip()
    feats, prob = predict_reddit_spread_risk(url)

    print("\n=== Extracted Features ===")
    for k, v in feats.items():
        print(f"{k:>20}: {v}")

    print("\n=== RESULT ===")
    print(f"Spread Risk Probability: {prob:.3f}")
    print("RISK:", "HIGH" if prob >= 0.5 else "LOW")


=== Extracted Features ===
     total_reactions: 63
     reactions_30min: 1
     reactions_60min: 63
       cascade_depth: 4
       cascade_width: 31
      tweets_per_day: 0
     avg_gap_minutes: 0
          burstiness: 0
       topic_entropy: 0

=== RESULT ===
Spread Risk Probability: 1.000
RISK: HIGH
