# 05 Risk Scoring





In [None]:
import pandas as pd
import numpy as np


In [None]:
df = pd.read_csv("../data/processed/text_with_narratives.csv")
df.head()


In [None]:
assert "topic" in df.columns
assert "clean_text" in df.columns


In [None]:
narrative_size = df["topic"].value_counts().to_dict()
df["narrative_size"] = df["topic"].map(narrative_size)
df.head()


In [None]:
np.random.seed(42)
df["misinformation_prob"] = np.random.uniform(0.4, 1.0, size=len(df))


In [None]:
df["norm_narrative_size"] = (
    df["narrative_size"] - df["narrative_size"].min()
) / (
    df["narrative_size"].max() - df["narrative_size"].min()
)


In [None]:
Risk Score =
  (0.6 × misinformation probability)
+ (0.4 × narrative spread)


In [None]:
df["risk_score"] = (
    0.6 * df["misinformation_prob"]
    + 0.4 * df["norm_narrative_size"]
)


In [None]:
def risk_level(score):
    if score >= 0.75:
        return "High Risk"
    elif score >= 0.5:
        return "Medium Risk"
    else:
        return "Low Risk"

df["risk_level"] = df["risk_score"].apply(risk_level)


In [None]:
df[df["risk_level"] == "High Risk"][[
    "clean_text", "topic", "risk_score"
]].head()


In [None]:
narrative_risk = (
    df.groupby("topic")
      .agg(
          avg_risk_score=("risk_score", "mean"),
          narrative_size=("clean_text", "count")
      )
      .reset_index()
      .sort_values(by="avg_risk_score", ascending=False)
)

narrative_risk.head()


In [None]:
df.to_csv("../data/processed/text_with_risk_scores.csv", index=False)
narrative_risk.to_csv("../data/processed/narrative_risk_summary.csv", index=False)

print("Risk scoring results saved successfully.")
