In [9]:
import os
import pandas as pd
import matplotlib.pyplot as plt

os.makedirs("../data/processed/figures", exist_ok=True)

In [10]:
df = pd.read_csv("../data/processed/model_comparison_metrics.csv")
df = df.rename(columns={"Unnamed: 0": "class"})
df.columns = df.columns.str.replace("-", "_")

fraud = df[df["class"].isin([1, "1"])].copy()
fraud

Unnamed: 0,model,class,precision,recall,f1_score,support
1,Baseline,1,0.828947,0.642857,0.724138,98.0
3,Balanced,1,0.060811,0.918367,0.114068,98.0


In [11]:
x = range(len(fraud))
plt.figure()
plt.scatter(fraud["recall"], fraud["precision"])
for i, row in fraud.reset_index(drop=True).iterrows():
    plt.text(row["recall"], row["precision"], row["model"])

plt.title("Precision vs Fraud (Fraud Class)")
plt.xlabel("Recall (Fraud)")
plt.ylabel("Precision (Fraud)")
plt.savefig("../data/processed/figures/precision_recall_fraud.png", bbox_inches="tight")
plt.close()

In [12]:
threshold_df = pd.read_csv("../data/processed/threshold_summary.csv")
threshold_df

Unnamed: 0,model,min_precision_target,chosen_threshold
0,Balanced,0.5,1.0


In [13]:
final_summary = fraud[["model", "precision", "recall", "f1_score", "support"]].copy()
final_summary = final_summary.rename(columns = {
    "precision": "fraud_precision",
    "recall": "fraud_recall",
    "f1_score": "fraud_f1_score",
    "support" : "fraud_support"
})

final_summary.to_csv("../data/processed/final_summary.csv", index=False)
final_summary

Unnamed: 0,model,fraud_precision,fraud_recall,fraud_f1_score,fraud_support
1,Baseline,0.828947,0.642857,0.724138,98.0
3,Balanced,0.060811,0.918367,0.114068,98.0


In [None]:
lines = []
lines.append("# Relatório final - Detecção de fraudes\n")

lines.append("## Conclusão")
lines.append("- Comparando Baseline vs Balanced, há um trade-off claro entre recall e precision na classe fraude.")
lines.append("- O modelo Balanced aumenta significativamente o recall, mas tende a gerar muitos falsos positivos.")
lines.append("- O threshold tuning foi aplicado para buscar um ponto de equilíbrio mais operacional.\n")

lines.append("## Resultados (Classe Fraude)")
for _, row in final_summary.iterrows():
    lines.append(f"- **{row['model']}**: precision={row['fraud_precision']:.3f}, recall={row['fraud_recall']:.3f}, f1={row['fraud_f1_score']:.3f}, support={int(row['fraud_support'])}")

lines.append("\n## Artefatos gerados")
lines.append("- `data/processed/final_summary.csv`")
lines.append("- `data/processed/figures/precision_recall_fraude.png`")
lines.append("- `data/processed/ai_conclusions.md` (interpretação via IA)\n")

report_text = "\n".join(lines)

with open("../data/processed/final_report.md", "w", encoding="utf-8") as f:
    f.write(report_text)

Salvo: data/processed/final_report.md
