In [7]:
import pandas as pd
import numpy as np

In [8]:
audit_df = pd.read_csv("../data/processed/fraud_cases.csv")
audit_df.head()

Unnamed: 0,test_index,y_true,y_pred,fraud_probability,outcome
0,0,0,0,7.517843e-09,TN
1,1,0,0,0.002014189,TN
2,2,0,0,0.0007212892,TN
3,3,0,0,0.00106211,TN
4,4,0,0,0.0001230299,TN


In [9]:
total = len(audit_df)
pred_fraud = (audit_df["y_pred"] == 1).sum()
true_fraud = (audit_df["y_true"] == 1).sum()

tp = ((audit_df["y_pred"] == 1) & (audit_df["y_true"] == 1)).sum()
fp = ((audit_df["y_pred"] == 0) & (audit_df["y_true"] == 1)).sum()
fn = ((audit_df["y_pred"] == 1) & (audit_df["y_true"] == 0)).sum()
tn = ((audit_df["y_pred"] == 0) & (audit_df["y_true"] == 0)).sum()

precision = tp / (tp + fp) if (tp + fp) else 0
recall = tp / (tp + fn) if (tp + fn) else 0

kpis = {
    "total_cases": total,
    "predicted_fraud": pred_fraud,
    "true_fraud": true_fraud,
    "tp": tp,
    "fp": fp,
    "fn": fn,
    "tn": tn,
    "precision": precision,
    "recall": recall
}

kpis_df = pd.DataFrame([kpis])
kpis_df

Unnamed: 0,total_cases,predicted_fraud,true_fraud,tp,fp,fn,tn,precision,recall
0,71202,91,123,76,47,15,71064,0.617886,0.835165


In [10]:
kpis_df.to_csv("../data/processed/audit_kpis.csv", index=False)

In [11]:
top_alerts = audit_df[audit_df["y_pred"] == 1].copy()
top_alerts = top_alerts.sort_values("fraud_probability", ascending=False)

top_alerts_50 = top_alerts.head(50)
top_alerts_50

Unnamed: 0,test_index,y_true,y_pred,fraud_probability,outcome
16401,16401,1,1,1.0,TP
1722,1722,1,1,1.0,TP
12848,12848,1,1,1.0,TP
20899,20899,1,1,1.0,TP
41029,41029,1,1,1.0,TP
40697,40697,1,1,1.0,TP
29767,29767,1,1,1.0,TP
16433,16433,1,1,1.0,TP
35877,35877,1,1,1.0,TP
35599,35599,1,1,1.0,TP


In [12]:
top_alerts_50.to_csv("../data/processed/top_alers_50.csv", index=False)

In [None]:
lines = []
lines.append("# Relatório de Auditoria — Detecção de Fraudes\n")

lines.append("## KPIs")
lines.append(f"- Total de casos (teste): **{total}**")
lines.append(f"- Fraudes reais (classe 1): **{true_fraud}**")
lines.append(f"- Alertas gerados (pred=1): **{pred_fraud}**")
lines.append(f"- TP: **{tp}** | FP: **{fp}** | FN: **{fn}** | TN: **{tn}**")
lines.append(f"- Precision: **{precision:.3f}**")
lines.append(f"- Recall: **{recall:.3f}**\n")

lines.append("## Interpretação")
lines.append("- Precision alta significa menos falsos positivos (menos clientes legítimos bloqueados).")
lines.append("- Recall alto significa menos fraudes passando despercebidas.")
lines.append("- Em fraude, normalmente buscamos maximizar recall mantendo precision operacionalmente aceitável.\n")

lines.append("## Top 10 alertas (maior probabilidade de fraude)")
lines.append(top_alerts_50.head(10)[["fraud_probability", "y_true", "y_pred", "outcome"]].to_markdown(index=False))

report_md = "\n".join(lines)

with open("../data/processed/audit_report.md", "w", encoding="utf-8") as f:
    f.write(report_md)

print("Salvo: data/processed/reports/audit_report.md")


Salvo: data/processed/audit_report.md
