In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
sns.set(style="whitegrid")


In [None]:
df = pd.read_csv("../data/processed/text_with_risk_scores.csv")
summary = pd.read_csv("../data/processed/narrative_risk_summary.csv")

df.head(), summary.head()


In [None]:
df["risk_level"].value_counts()


In [None]:
plt.figure(figsize=(6,4))
sns.countplot(x="risk_level", data=df, order=["Low Risk", "Medium Risk", "High Risk"])
plt.title("Distribution of Misinformation Risk Levels")
plt.xlabel("Risk Level")
plt.ylabel("Number of Posts")
plt.show()


In [None]:
top_narratives = summary.head(10)
top_narratives


In [None]:
plt.figure(figsize=(7,5))
sns.scatterplot(
    data=summary,
    x="narrative_size",
    y="avg_risk_score",
    size="narrative_size",
    legend=False
)
plt.title("Narrative Spread vs Risk Score")
plt.xlabel("Narrative Size (Number of Posts)")
plt.ylabel("Average Risk Score")
plt.show()


In [None]:
plt.figure(figsize=(8,5))
sns.barplot(
    data=top_narratives,
    x="topic",
    y="avg_risk_score"
)
plt.title("Top High-Risk Narratives")
plt.xlabel("Narrative (Topic ID)")
plt.ylabel("Average Risk Score")
plt.show()


In [None]:
plt.figure(figsize=(7,4))
sns.histplot(df["risk_score"], bins=20, kde=True)
plt.title("Distribution of Risk Scores")
plt.xlabel("Risk Score")
plt.ylabel("Frequency")
plt.show()


In [None]:
df.sort_values("risk_score", ascending=False)[[
    "clean_text",
    "topic",
    "risk_score",
    "risk_level"
]].head(5)


In [None]:
plt.figure(figsize=(6,4))
sns.countplot(x="risk_level", data=df)
plt.title("Risk Level Distribution")
plt.savefig("../data/processed/risk_level_distribution.png")
plt.close()
