In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('seaborn-v0_8')
from pathlib import Path

data_dir = Path("data/processed")
eval_dir = Path("results/evaluation")
eval_dir.mkdir(parents=True, exist_ok=True)


In [None]:
# Load performance metrics summary generated from modelling.ipynb
metrics_summary = pd.read_csv(eval_dir / "model_performance_summary.csv")

print("Loaded model metrics summary:")
metrics_summary


In [None]:
plt.figure(figsize=(8,5))
sns.barplot(x="Model", y="Accuracy", data=metrics_summary)
plt.title("Model Accuracy Comparison")
plt.ylabel("Accuracy")
plt.show()

plt.figure(figsize=(8,5))
sns.barplot(x="Model", y="ROC_AUC", data=metrics_summary)
plt.title("Model ROC-AUC Comparison")
plt.ylabel("ROC-AUC")
plt.show()


In [None]:
# Optional: Load saved confusion matrices from modelling.ipynb if saved
import joblib

cms = joblib.load(eval_dir / "confusion_matrices.joblib")

for model_name, cm in cms.items():
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
                xticklabels=["Pred Dismiss", "Pred Report"],
                yticklabels=["True Dismiss", "True Report"])
    plt.title(f"{model_name} Confusion Matrix")
    plt.show()


In [None]:
print("Evaluation Insights:")
print("- Highlight the best performing model based on ROC-AUC and Accuracy.")
print("- Identify models with too many false negatives (missed suspicious activities).")
print("- Suggest threshold adjustments if needed for imbalanced classes.")
print("- Recommend next steps for deployment or further tuning.")
