# Model Comparison — Fraud Detection

This notebook evaluates multiple baseline and advanced models using
imbalance-aware metrics to identify a production-ready approach.

In [19]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import recall_score, precision_score, f1_score, roc_auc_score, average_precision_score
from xgboost import XGBClassifier

In [8]:
df = pd.read_csv('../data/creditcard.csv')

In [11]:
df['hour'] = (df['Time'] // 3600) % 24

In [12]:
df = df.sort_values('Time')
df['time_diff'] = df['Time'].diff().fillna(0)
df['log_amount'] = np.log1p(df['Amount'])
amount_mean = df['Amount'].mean()
amount_std = df['Amount'].std()

df['amount_zscore'] = (df['Amount'] - amount_mean) / amount_std


In [13]:
df.columns

Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',
       'Class', 'time_diff', 'log_amount', 'amount_zscore', 'hour'],
      dtype='object')

In [14]:
features = ['hour', 'time_diff', 'log_amount', 'amount_zscore'] + [f'V{i}' for i in range(1, 29)]

X = df[features]
y = df['Class']

In [15]:
X.isnull().sum().max()
X.describe()

Unnamed: 0,hour,time_diff,log_amount,amount_zscore,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28
count,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0
mean,14.04647,0.606699,3.152188,-3.871964e-17,1.157198e-15,3.448842e-16,-1.459371e-15,2.110021e-15,9.819619e-16,1.496893e-15,-5.552476e-16,1.14163e-16,-2.399669e-15,2.239153e-15,1.67173e-15,-1.232642e-15,8.185012e-16,1.214679e-15,4.902624e-15,1.436319e-15,-3.794125e-16,9.75176e-16,1.039942e-15,6.405705e-16,1.57573e-16,-3.552626e-16,2.614573e-16,4.472068e-15,5.340915e-16,1.684469e-15,-3.660216e-16,-1.226829e-16
std,5.835854,1.05338,1.656648,1.0,1.958696,1.651309,1.516255,1.415869,1.380247,1.332271,1.237094,1.194353,1.098632,1.08885,1.020713,0.9992014,0.9952742,0.9585956,0.915316,0.8762529,0.8493371,0.8381762,0.8140405,0.770925,0.734524,0.7257016,0.6244603,0.6056471,0.5212781,0.482227,0.4036325,0.3300833
min,0.0,0.0,0.0,-0.3532288,-56.40751,-72.71573,-48.32559,-5.683171,-113.7433,-26.16051,-43.55724,-73.21672,-13.43407,-24.58826,-4.797473,-18.68371,-5.791881,-19.21433,-4.498945,-14.12985,-25.1628,-9.498746,-7.213527,-54.49772,-34.83038,-10.93314,-44.80774,-2.836627,-10.2954,-2.604551,-22.56568,-15.43008
25%,10.0,0.0,1.88707,-0.3308395,-0.9203734,-0.5985499,-0.8903648,-0.8486401,-0.6915971,-0.7682956,-0.5540759,-0.2086297,-0.6430976,-0.5354257,-0.7624942,-0.4055715,-0.6485393,-0.425574,-0.5828843,-0.4680368,-0.4837483,-0.4988498,-0.4562989,-0.2117214,-0.2283949,-0.5423504,-0.1618463,-0.3545861,-0.3171451,-0.3269839,-0.07083953,-0.05295979
50%,15.0,0.0,3.135494,-0.265271,0.0181088,0.06548556,0.1798463,-0.01984653,-0.05433583,-0.2741871,0.04010308,0.02235804,-0.05142873,-0.09291738,-0.03275735,0.1400326,-0.01356806,0.05060132,0.04807155,0.06641332,-0.06567575,-0.003636312,0.003734823,-0.06248109,-0.02945017,0.006781943,-0.01119293,0.04097606,0.0165935,-0.05213911,0.001342146,0.01124383
75%,19.0,1.0,4.358822,-0.04471699,1.315642,0.8037239,1.027196,0.7433413,0.6119264,0.3985649,0.5704361,0.3273459,0.597139,0.4539234,0.7395934,0.618238,0.662505,0.4931498,0.6488208,0.5232963,0.399675,0.5008067,0.4589494,0.1330408,0.1863772,0.5285536,0.1476421,0.4395266,0.3507156,0.2409522,0.09104512,0.07827995
max,23.0,32.0,10.153941,102.3621,2.45493,22.05773,9.382558,16.87534,34.80167,73.30163,120.5895,20.00721,15.59499,23.74514,12.01891,7.848392,7.126883,10.52677,8.877742,17.31511,9.253526,5.041069,5.591971,39.4209,27.20284,10.50309,22.52841,4.584549,7.519589,3.517346,31.6122,33.84781


In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [18]:
lr = LogisticRegression(
    class_weight='balanced',
    max_iter=1000,
    random_state=42
)

lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

In [20]:
precision_lr = precision_score(y_test, y_pred_lr)
recall_lr = recall_score(y_test, y_pred_lr)
pr_auc_lr = average_precision_score(y_test, lr.predict_proba(X_test)[:,1])
f1_score = f1_score(y_test, y_pred_lr)

In [23]:
scale_pos_weight = (y_train == 0).sum() / (y_train == 1).sum()

xgb_model = XGBClassifier(
    n_estimators=200,
    max_depth=5,
    learning_rate=0.05,
    scale_pos_weight=scale_pos_weight,
    eval_metric='logloss',
    random_state=42
)

xgb_model.fit(X_train, y_train)

AttributeError: 'super' object has no attribute '__sklearn_tags__'

AttributeError: 'super' object has no attribute '__sklearn_tags__'

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='logloss',
              feature_types=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=5,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=200,
              n_jobs=None, num_parallel_tree=None, random_state=42, ...)

## Model Comparison Summary

- Logistic Regression provides a strong, interpretable baseline but struggles with recall under extreme imbalance.
- Cost-aware XGBoost achieves a better precision–recall trade-off and higher business profit.
- Business-aligned evaluation reveals improvements that traditional metrics alone would miss.
