In [1]:
import pandas as pd
from sklearn.metrics import (
    precision_score, recall_score, f1_score,
    roc_auc_score, average_precision_score
)

# Load the small dev sample
df = pd.read_csv("../data/processed/dev_sample.csv")

# Sanity: separate features & target
X = df.drop(columns=["Class"])
y = df["Class"]

# Baseline: Random (no model)
positive_rate = y.mean()
print(f"Fraud rate: {positive_rate:.4f}")

Fraud rate: 0.0896


In [2]:
# Define helper metric printer
def evaluate_model(y_true, y_pred_proba, threshold=0.5):
    y_pred = (y_pred_proba >= threshold).astype(int)
    metrics = {
        "Precision": precision_score(y_true, y_pred),
        "Recall": recall_score(y_true, y_pred),
        "F1": f1_score(y_true, y_pred),
        "ROC_AUC": roc_auc_score(y_true, y_pred_proba),
        "PR_AUC": average_precision_score(y_true, y_pred_proba)
    }
    for k,v in metrics.items():
        print(f"{k:10s}: {v:0.4f}")
    return metrics

In [None]:
import numpy as np

# Pretend model outputs random probabilities
rng = np.random.RandomState(42)
y_pred_proba = rng.rand(len(y))

# Evaluate "random model"
evaluate_model(y, y_pred_proba)


Precision : 0.0874
Recall    : 0.4858
F1        : 0.1481
ROC_AUC   : 0.4845
PR_AUC    : 0.0875


{'Precision': 0.08735380116959064,
 'Recall': 0.48577235772357724,
 'F1': 0.14807930607187111,
 'ROC_AUC': 0.4844719512195122,
 'PR_AUC': 0.08746249935495926}