# Manipulation Detection Data Exploration
This notebook uses `pandas and seaborn` to explore and visualize the dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset (adjust path if needed)
df = dataset["train"].to_pandas()

manipulation = ['Manipulative', 'Non-manipulative']

techniques = [
    "Denial",
    "Evasion",
    "Feigning Innocence",
    "Rationalization",
    "Playing the Victim Role",
    "Playing the Servant Role",
    "Shaming or Belittlement",
    "Intimidation",
    "Brandishing Anger",
    "Accusation",
    "Persuasion or Seduction"
]

vulnerability = [
    "Over-responsibility",
    "Over-intellectualization",
    "Naivete",
    "Low self-esteem",
    "Dependency"
]

In [None]:
plt.figure(figsize=(6, 4))
sns.countplot(y='Manipulative', data=df, order=df['Manipulative'].value_counts().index, palette='coolwarm')
plt.title("Manipulation Label Distribution")
plt.xlabel("Count")
plt.ylabel("Label")
plt.tight_layout()
plt.show()

In [None]:
tech_counts = df[techniques].sum().sort_values(ascending=False)
plt.figure(figsize=(10, 6))
sns.barplot(x=tech_counts.values, y=tech_counts.index, palette="viridis")
plt.title("Distribution of Manipulation Techniques")
plt.xlabel("Count")
plt.ylabel("Technique")
plt.tight_layout()
plt.show()

In [None]:
vuln_counts = df[vulnerability].sum().sort_values(ascending=False)
plt.figure(figsize=(8, 5))
sns.barplot(x=vuln_counts.values, y=vuln_counts.index, palette="rocket")
plt.title("Distribution of Vulnerabilities")
plt.xlabel("Count")
plt.ylabel("Vulnerability")
plt.tight_layout()
plt.show()

In [None]:
tech_co_matrix = df[techniques].T.dot(df[techniques])
plt.figure(figsize=(10, 8))
sns.heatmap(tech_co_matrix, annot=True, fmt="d", cmap="magma")
plt.title("Technique Co-occurrence Heatmap")
plt.tight_layout()
plt.show()

In [None]:
vuln_co_matrix = df[vulnerability].T.dot(df[vulnerability])
plt.figure(figsize=(6, 5))
sns.heatmap(vuln_co_matrix, annot=True, fmt="d", cmap="Blues")
plt.title("Vulnerability Co-occurrence Heatmap")
plt.tight_layout()
plt.show()