# IEEE-CIS Fraud Detection â€” 03 Monitoring, Drift Detection & Production Simulation

This notebook demonstrates how to monitor a deployed fraud detection model using batch statistics, drift detection (PSI/KS), concept drift simulation, and alerting. All code and documentation are in English for portfolio and production use.

## 1 â€” Import Required Libraries

Import all standard, monitoring, and visualization libraries used in this notebook.

In [None]:
import os, glob, warnings, joblib, gc
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import roc_auc_score, average_precision_score, f1_score
from ds_tools.evaluation.calibration import brier_score, expected_calibration_error
from ds_tools.monitoring.drift import psi, ks_drift_test, simulate_drift, drift_report

sns.set_theme(style='whitegrid', palette='muted', font_scale=1.1)
pd.set_option('display.float_format', '{:,.4f}'.format)
pd.set_option('display.max_columns', 50)

print('Imports OK')

## 2 â€” Load Model, Data, and Simulate Batches

Load the best model, test data, and simulate production batches for monitoring and drift analysis.

In [None]:
# Carregar artefatos
artefacts = joblib.load(os.path.join('..', 'artefacts', 'fraud_model.joblib'))
model = artefacts['model']
feature_cols = artefacts['feature_cols']
threshold = artefacts['threshold']
model_name = artefacts['model_name']
print(f'Loaded: {model_name} | threshold={threshold:.2f}')

# Carregar dados IEEE-CIS
try:
    import kagglehub
    path = kagglehub.dataset_download('ieee-fraud-detection')
    csv_files = glob.glob(os.path.join(path, '**', '*.csv'), recursive=True)
    csv_map = {os.path.basename(f): f for f in csv_files}
    train_txn = pd.read_csv(csv_map['train_transaction.csv'])
    train_id  = pd.read_csv(csv_map['train_identity.csv'])
except Exception as e:
    print(f'kagglehub failed ({e}), tentando ../data/...')
    train_txn = pd.read_csv('../data/train_transaction.csv')
    train_id  = pd.read_csv('../data/train_identity.csv')

full = train_txn.merge(train_id, on='TransactionID', how='left')
del train_txn, train_id; gc.collect()

# PrÃ©-processamento igual ao notebook 02
ignore_cols = ['TransactionID', 'TransactionDT']
X = full.drop(columns=ignore_cols + ['isFraud'])
y = full['isFraud'].astype(int)

from sklearn.preprocessing import LabelEncoder
cat_cols = X.select_dtypes(include=['object']).columns
for col in cat_cols:
    X[col] = X[col].fillna('missing')
    X[col] = LabelEncoder().fit_transform(X[col])
num_cols = X.select_dtypes(include=[np.number]).columns
X[num_cols] = X[num_cols].fillna(-999)

print(f'Dados carregados: {X.shape}')

## 3 â€” Batch Monitoring: Metrics, Distributions, and Alerts

Monitor batch-level metrics (fraud rate, prediction drift, feature drift) and generate alerts if thresholds are exceeded.

In [None]:
# Exemplo: batch por mÃªs (TransactionDT ~ dias desde 2017-12-01)
full['DT_month'] = (full['TransactionDT'] // (3600 * 24 * 30)).fillna(0).astype(int)
batch_metrics = []
for month in sorted(full['DT_month'].unique()):
    batch = full[full['DT_month'] == month]
    Xb = X.loc[batch.index, feature_cols]
    yb = batch['isFraud']
    if yb.sum() == 0 or yb.nunique() < 2:
        continue
    y_prob = model.predict_proba(Xb)[:, 1] if hasattr(model, 'predict_proba') else model(Xb)
    y_pred = (y_prob >= threshold).astype(int)
    batch_metrics.append({
        'month': month,
        'n': len(batch),
        'n_fraud': int(yb.sum()),
        'fraud_rate': yb.mean(),
        'roc_auc': roc_auc_score(yb, y_prob),
        'pr_auc': average_precision_score(yb, y_prob),
        'brier': brier_score(yb.values, y_prob),
        'f1': f1_score(yb, y_pred),
    })
batch_df = pd.DataFrame(batch_metrics)
display(batch_df)

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
for ax, metric, color in [
    (axes[0, 0], 'roc_auc', 'steelblue'),
    (axes[0, 1], 'pr_auc', 'darkorange'),
    (axes[1, 0], 'brier', 'crimson'),
    (axes[1, 1], 'f1', 'green'),
]:
    ax.plot(batch_df['month'], batch_df[metric], 'o-', color=color, linewidth=2)
    ax.set_title(metric.upper())
    ax.set_xlabel('Month')
    ax.grid(True, alpha=0.3)
plt.tight_layout(); plt.show()

## 4 â€” Population Stability Index (PSI) and Kolmogorov-Smirnov (KS) Drift

Calculate PSI and KS statistics to detect feature and prediction drift between reference and production batches.

In [None]:
# ReferÃªncia: mÃªs 0
ref_idx = full['DT_month'] == 0
reference = X.loc[ref_idx, feature_cols]

# Drift para cada mÃªs
psi_vals = []
ks_vals = []
for month in sorted(full['DT_month'].unique()):
    if month == 0: continue
    cur_idx = full['DT_month'] == month
    current = X.loc[cur_idx, feature_cols]
    psi_month = {f: psi(reference[f].values, current[f].values) for f in feature_cols}
    ks_month = {f: ks_drift_test(reference[f].values, current[f].values)['statistic'] for f in feature_cols}
    psi_vals.append({'month': month, **psi_month})
    ks_vals.append({'month': month, **ks_month})
psi_df = pd.DataFrame(psi_vals).set_index('month')
ks_df = pd.DataFrame(ks_vals).set_index('month')

# VisualizaÃ§Ã£o: mÃ©dia dos top-5 features com maior drift
top_psi = psi_df.max().sort_values(ascending=False).head(5).index
psi_df[top_psi].plot(figsize=(12, 5), marker='o')
plt.title('PSI dos Top-5 Features com Maior Drift')
plt.ylabel('PSI'); plt.xlabel('Month'); plt.axhline(0.1, color='orange', ls='--'); plt.axhline(0.2, color='red', ls='--'); plt.show()

## 5 â€” Concept Drift Simulation

Simulate concept drift by altering the fraud rate or feature distributions in production batches, and observe the impact on model performance and alerts.

In [None]:
# Simular drift em TransactionAmt do mÃªs 1
month = 1
cur_idx = full['DT_month'] == month
current = X.loc[cur_idx, feature_cols].copy()
current_drifted = simulate_drift(current, 'TransactionAmt', drift_type='shift', magnitude=2.0)

# PSI e KS apÃ³s drift
psi_drift = psi(reference['TransactionAmt'].values, current_drifted['TransactionAmt'].values)
ks_drift = ks_drift_test(reference['TransactionAmt'].values, current_drifted['TransactionAmt'].values)
print(f'PSI apÃ³s drift: {psi_drift:.3f} | KS stat: {ks_drift["statistic"]:.3f} | p-value: {ks_drift["p_value"]:.3g}')

# Impacto na performance
yb = full.loc[cur_idx, 'isFraud']
y_prob = model.predict_proba(current_drifted)[:, 1] if hasattr(model, 'predict_proba') else model(current_drifted)
auc = roc_auc_score(yb, y_prob)
brier = brier_score(yb.values, y_prob)
print(f'AUC apÃ³s drift: {auc:.4f} | Brier: {brier:.6f}')

## 6 â€” Evidently AI Integration

Use Evidently AI to generate interactive monitoring dashboards and reports for model and data drift.

In [None]:
try:
    from evidently.report import Report
    from evidently.metric_preset import DataDriftPreset, TargetDriftPreset
    ref_sample = reference.sample(n=min(5000, len(reference)), random_state=42)
    cur_sample = current_drifted.sample(n=min(5000, len(current_drifted)), random_state=42)
    ref_sample['target'] = full.loc[ref_sample.index, 'isFraud']
    cur_sample['target'] = full.loc[cur_sample.index, 'isFraud']
    report = Report(metrics=[DataDriftPreset(), TargetDriftPreset()])
    report.run(reference_data=ref_sample, current_data=cur_sample)
    report_path = os.path.join('..', 'artefacts', 'evidently_drift_report.html')
    report.save_html(report_path)
    print(f'Evidently report salvo em {report_path}')
except ImportError:
    print('Evidently nÃ£o instalado. pip install evidently')

## 7 â€” Alert Generation Logic

Implement logic to trigger alerts when drift or performance metrics exceed defined thresholds.

In [None]:
def check_alerts(psi_val, ks_val, auc_ref, auc_cur, brier_ref, brier_cur):
    alerts = []
    if psi_val >= 0.2:
        alerts.append('ðŸ”´ PSI >= 0.20 (retrain)')
    elif psi_val >= 0.1:
        alerts.append('ðŸŸ¡ PSI >= 0.10 (investigar)')
    if ks_val > 0.2:
        alerts.append('ðŸ”´ KS > 0.2 (drift forte)')
    auc_drop = auc_ref - auc_cur
    if auc_drop > 0.05:
        alerts.append(f'ðŸ”´ AUC caiu {auc_drop:.3f}')
    elif auc_drop > 0.02:
        alerts.append(f'ðŸŸ¡ AUC caiu {auc_drop:.3f}')
    brier_inc = brier_cur - brier_ref
    if brier_inc > 0.01:
        alerts.append(f'ðŸ”´ Brier subiu {brier_inc:.4f}')
    elif brier_inc > 0.005:
        alerts.append(f'ðŸŸ¡ Brier subiu {brier_inc:.4f}')
    if not alerts:
        alerts.append('ðŸŸ¢ Tudo OK')
    for a in alerts:
        print(a)
    return alerts

# Exemplo: referÃªncia vs mÃªs 1 driftado
auc_ref = batch_df.loc[batch_df['month']==0, 'roc_auc'].values[0]
brier_ref = batch_df.loc[batch_df['month']==0, 'brier'].values[0]
check_alerts(psi_drift, ks_drift['statistic'], auc_ref, auc, brier_ref, brier)

## 8 â€” ConclusÃ£o e PrÃ³ximos Passos

Este notebook demonstrou monitoramento batch, detecÃ§Ã£o de drift, simulaÃ§Ã£o de concept drift, geraÃ§Ã£o de alertas e integraÃ§Ã£o com Evidently AI. Ajuste os thresholds e anÃ¡lises conforme os resultados reais do seu pipeline.