### importing Libraries

In [25]:
# !pip install imbalanced-learn

In [26]:
# %pip install xgboost

In [27]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import (
    average_precision_score, precision_score, recall_score, f1_score, confusion_matrix
)
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE

from xgboost import XGBClassifier

### Load Dataset

In [28]:
## Reload raw data cleanly without scale and drop

df = pd.read_csv("../data/raw/creditcard.csv")
df = df.sort_values("Time").reset_index(drop=True)

features = [c for c in df.columns if c != "Class"]
X = df[features]
y = df["Class"].astype(int)


### Data Quality Checks

In [29]:
missing_total = df.isnull().sum().sum()
dup_count = df.duplicated().sum()

print("Total missing values:", missing_total)
print("Duplicated rows:", dup_count)

if dup_count > 0:
    df = df.drop_duplicates().reset_index(drop=True)
    print("Dropped duplicates. New shape:", df.shape)

Total missing values: 0
Duplicated rows: 1081
Dropped duplicates. New shape: (283726, 31)


### checking for class imbalances

In [30]:
counts = df["Class"].value_counts()
pct = df["Class"].value_counts(normalize=True) * 100

print("Class counts:\n", counts)
print("\nClass %:\n", pct.round(4))

Class counts:
 Class
0    283253
1       473
Name: count, dtype: int64

Class %:
 Class
0    99.8333
1     0.1667
Name: proportion, dtype: float64


### Time-based split (train/val/test)

In [31]:
n = len(df)
train_end = int(n * 0.70)
val_end = int(n * 0.85)

train_df = df.iloc[:train_end]
val_df = df.iloc[train_end:val_end]
test_df = df.iloc[val_end:]

X_train = train_df.drop(columns=["Class"])
y_train = train_df["Class"].astype(int)

X_val = val_df.drop(columns=["Class"])
y_val = val_df["Class"].astype(int)

X_test = test_df.drop(columns=["Class"])
y_test = test_df["Class"].astype(int)

print("Shapes:")
print("  Train       X:", X_train.shape, " y:", y_train.shape)
print("  Validation  X:", X_val.shape,   " y:", y_val.shape)
print("  Test        X:", X_test.shape,  " y:", y_test.shape)

print("\nFraud rate:")
print(f"  Train:      {y_train.mean():.6f}")
print(f"  Validation: {y_val.mean():.6f}")
print(f"  Test:       {y_test.mean():.6f}")

print("\nFraud counts:")
print(f"  Train:      {int(y_train.sum())} / {len(y_train)}")
print(f"  Validation: {int(y_val.sum())} / {len(y_val)}")
print(f"  Test:       {int(y_test.sum())} / {len(y_test)}")

Shapes:
  Train       X: (198608, 30)  y: (198608,)
  Validation  X: (42559, 30)  y: (42559,)
  Test        X: (42559, 30)  y: (42559,)

Fraud rate:
  Train:      0.001843
  Validation: 0.001292
  Test:       0.001222

Fraud counts:
  Train:      366 / 198608
  Validation: 55 / 42559
  Test:       52 / 42559


### Undersampling (train only)

In [32]:
train_data = pd.concat([X_train, y_train.rename("Class")], axis=1)

fraud_df = train_data[train_data["Class"] == 1]
non_fraud_df = train_data[train_data["Class"] == 0]

# Safety check
print("\nTrain class counts BEFORE undersampling:")
print(train_data["Class"].value_counts())

non_fraud_sampled = non_fraud_df.sample(n=len(fraud_df), random_state=42)

balanced_train = (
    pd.concat([fraud_df, non_fraud_sampled])
      .sample(frac=1, random_state=42)
      .reset_index(drop=True)
)

X_train_under = balanced_train.drop(columns=["Class"])
y_train_under = balanced_train["Class"].astype(int)

print("\nTrain class counts AFTER undersampling:")
print(y_train_under.value_counts())


Train class counts BEFORE undersampling:
Class
0    198242
1       366
Name: count, dtype: int64

Train class counts AFTER undersampling:
Class
0    366
1    366
Name: count, dtype: int64


### Common evaluation helpers


In [33]:

def evaluate_model(model, X, y, name="Model"):
    probs = model.predict_proba(X)[:, 1]
    ap = average_precision_score(y, probs)
    print(f"\n--- {name} ---")
    print(f"PR-AUC (Average Precision): {ap:.4f}")
    return ap, probs

def evaluate_at_threshold(model, X, y, name="Model", threshold=0.5):
    probs = model.predict_proba(X)[:, 1]
    ap = average_precision_score(y, probs)
    preds = (probs >= threshold).astype(int)

    prec = precision_score(y, preds, zero_division=0)
    rec = recall_score(y, preds, zero_division=0)
    f1 = f1_score(y, preds, zero_division=0)

    print(f"\n--- {name} (threshold={threshold}) ---")
    print(f"PR-AUC: {ap:.4f}")
    print(f"Precision: {prec:.4f} | Recall: {rec:.4f} | F1: {f1:.4f}")
    print("Confusion Matrix:\n", confusion_matrix(y, preds))
    return ap, prec, rec, f1

def evaluate_at_top_n(model, X, y, name="Model", top_n=200):
    probs = model.predict_proba(X)[:, 1]
    ap = average_precision_score(y, probs)

    idx = np.argsort(probs)[::-1]
    preds = np.zeros(len(y), dtype=int)
    preds[idx[:top_n]] = 1

    y_true = np.array(y)
    tp = ((preds == 1) & (y_true == 1)).sum()
    fp = ((preds == 1) & (y_true == 0)).sum()
    fn = ((preds == 0) & (y_true == 1)).sum()

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0.0

    print(f"\n--- {name} (Top-N={top_n}) ---")
    print(f"PR-AUC: {ap:.4f}")
    print(f"Precision@Top{top_n}: {precision:.4f} | Recall@Top{top_n}: {recall:.4f} | F1@Top{top_n}: {f1:.4f}")
    return ap, precision, recall, f1




### Preprocessing: scale only Amount 


In [34]:
scale_cols = ["Amount"]  # optionally add "Time"
preprocess = ColumnTransformer(
    transformers=[("scale", RobustScaler(), scale_cols)],
    remainder="passthrough"
)

### fraud detection Model

#### logistic Regression

In [35]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import average_precision_score

# Train LR on undersampled training set, with scaling inside pipeline
lr_model = Pipeline(steps=[
    ("preprocess", preprocess),
    ("lr", LogisticRegression(max_iter=5000, random_state=42))
])

lr_model.fit(X_train_under, y_train_under)

val_probs = lr_model.predict_proba(X_val)[:, 1]
val_ap = average_precision_score(y_val, val_probs)

print("--- Logistic Regression (Validation) ---")
print(f"PR-AUC (Average Precision): {val_ap:.4f}")

evaluate_at_threshold(lr_model, X_val, y_val, name="LogReg (Under+ScaleAmount)", threshold=0.05)
evaluate_at_top_n(lr_model, X_val, y_val, name="LogReg (Under+ScaleAmount)", top_n=200)

--- Logistic Regression (Validation) ---
PR-AUC (Average Precision): 0.4039

--- LogReg (Under+ScaleAmount) (threshold=0.05) ---
PR-AUC: 0.4039
Precision: 0.0038 | Recall: 0.9818 | F1: 0.0076
Confusion Matrix:
 [[28427 14077]
 [    1    54]]

--- LogReg (Under+ScaleAmount) (Top-N=200) ---
PR-AUC: 0.4039
Precision@Top200: 0.2400 | Recall@Top200: 0.8727 | F1@Top200: 0.3765


(0.4039047652584162,
 np.float64(0.24),
 np.float64(0.8727272727272727),
 np.float64(0.3764705882352941))

In [36]:
## Feature analysis (coefficients) with pipeline
feature_names = X_train_under.columns
coefs = lr_model.named_steps["lr"].coef_[0]

lr_coefs = pd.DataFrame({
    "Feature": feature_names,
    "Coefficient": coefs
})
lr_coefs["Abs_Coef"] = lr_coefs["Coefficient"].abs()
lr_coefs = lr_coefs.sort_values("Abs_Coef", ascending=False)

print("\nTop 5 Features by Absolute Coefficient:")
print(lr_coefs.head(5))


Top 5 Features by Absolute Coefficient:
   Feature  Coefficient  Abs_Coef
15     V15    -0.926582  0.926582
5       V5     0.862760  0.862760
9       V9    -0.840034  0.840034
13     V13    -0.815987  0.815987
23     V23     0.748007  0.748007


### Randm Forest

In [37]:
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.under_sampling import RandomUnderSampler

rf_under_pipe = ImbPipeline([
    ("under", RandomUnderSampler(random_state=42)),
    ("rf", RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1))
])

rf_under_pipe.fit(X_train, y_train)

evaluate_at_threshold(rf_under_pipe, X_val, y_val, name="RF (UnderPipe)", threshold=0.05)
evaluate_at_top_n(rf_under_pipe, X_val, y_val, name="RF (UnderPipe)", top_n=200)


--- RF (UnderPipe) (threshold=0.05) ---
PR-AUC: 0.8413
Precision: 0.0019 | Recall: 1.0000 | F1: 0.0037
Confusion Matrix:
 [[13242 29262]
 [    0    55]]

--- RF (UnderPipe) (Top-N=200) ---
PR-AUC: 0.8413
Precision@Top200: 0.2400 | Recall@Top200: 0.8727 | F1@Top200: 0.3765


(0.8413424738193728,
 np.float64(0.24),
 np.float64(0.8727272727272727),
 np.float64(0.3764705882352941))

### XGBoost

In [38]:
from xgboost import XGBClassifier
from sklearn.metrics import average_precision_score

neg = (y_train == 0).sum()
pos = (y_train == 1).sum()
scale_pos_weight = neg / pos

xgb_weighted = XGBClassifier(
    eval_metric="logloss",
    random_state=42,
    n_estimators=800,
    max_depth=4,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    n_jobs=-1,
    scale_pos_weight=scale_pos_weight
)

xgb_weighted.fit(X_train, y_train)

val_probs = xgb_weighted.predict_proba(X_val)[:, 1]
val_ap = average_precision_score(y_val, val_probs)

print("--- XGBoost (Weighted) Validation ---")
print(f"PR-AUC (Average Precision): {val_ap:.4f}")

evaluate_at_top_n(xgb_weighted, X_val, y_val, name="XGB (Weighted)", top_n=200)



--- XGBoost (Weighted) Validation ---
PR-AUC (Average Precision): 0.8514

--- XGB (Weighted) (Top-N=200) ---
PR-AUC: 0.8514
Precision@Top200: 0.2400 | Recall@Top200: 0.8727 | F1@Top200: 0.3765


(0.851364563837334,
 np.float64(0.24),
 np.float64(0.8727272727272727),
 np.float64(0.3764705882352941))

In [39]:
## feature importance
xgb_importances = pd.DataFrame({
    "Feature": X_train.columns,
    "Importance": xgb_weighted.feature_importances_
}).sort_values("Importance", ascending=False)

print("\nTop 5 Feature Importances (XGBoost Weighted):")
print(xgb_importances.head(5))


Top 5 Feature Importances (XGBoost Weighted):
   Feature  Importance
14     V14    0.355702
10     V10    0.151346
12     V12    0.073023
4       V4    0.072063
8       V8    0.030004


### Gaussian Naive Bayes

In [40]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, average_precision_score

gnb = GaussianNB()
gnb.fit(X_train_under, y_train_under)

y_val_pred_gnb = gnb.predict(X_val)
y_val_probs_gnb = gnb.predict_proba(X_val)[:, 1]

val_ap_gnb = average_precision_score(y_val, y_val_probs_gnb)

print("--- Naive Bayes (Validation) ---")
print(f"Average Precision (PR-AUC): {val_ap_gnb:.4f}")
print("\nClassification Report:\n")
print(classification_report(y_val, y_val_pred_gnb))

# Optional: keep consistent with your other models
evaluate_at_threshold(gnb, X_val, y_val, name="GNB (Under)", threshold=0.05)
evaluate_at_top_n(gnb, X_val, y_val, name="GNB (Under)", top_n=200)

--- Naive Bayes (Validation) ---
Average Precision (PR-AUC): 0.0539

Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.98      0.99     42504
           1       0.05      0.76      0.09        55

    accuracy                           0.98     42559
   macro avg       0.52      0.87      0.54     42559
weighted avg       1.00      0.98      0.99     42559


--- GNB (Under) (threshold=0.05) ---
PR-AUC: 0.0539
Precision: 0.0433 | Recall: 0.8000 | F1: 0.0822
Confusion Matrix:
 [[41532   972]
 [   11    44]]

--- GNB (Under) (Top-N=200) ---
PR-AUC: 0.0539
Precision@Top200: 0.0700 | Recall@Top200: 0.2545 | F1@Top200: 0.1098


(0.05385767697945146,
 np.float64(0.07),
 np.float64(0.2545454545454545),
 np.float64(0.10980392156862745))

### Over Sampling (SMOTE) + Logistic Regression pipeline 


In [41]:
import pandas as pd
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import average_precision_score

# SMOTE + Scaling + Logistic Regression in one pipeline
lr_smote_pipe = ImbPipeline(steps=[
    ("preprocess", preprocess),
    ("smote", SMOTE(random_state=42)),
    ("lr", LogisticRegression(max_iter=5000, solver="liblinear", random_state=42))
])

# Fit on training data only
lr_smote_pipe.fit(X_train, y_train)

# PR-AUC on validation
val_probs = lr_smote_pipe.predict_proba(X_val)[:, 1]
val_ap = average_precision_score(y_val, val_probs)

print("--- Logistic Regression (SMOTE Pipeline) Validation ---")
print(f"PR-AUC (Average Precision): {val_ap:.4f}")

# Threshold / Top-N evaluation
evaluate_at_threshold(lr_smote_pipe, X_val, y_val, name="LogReg (SMOTE)", threshold=0.05)
evaluate_at_top_n(lr_smote_pipe, X_val, y_val, name="LogReg (SMOTE)", top_n=200)

# Feature coefficients (from pipeline)
coefs = lr_smote_pipe.named_steps["lr"].coef_[0]
lr_sm_coefs = pd.DataFrame({
    "Feature": X_train.columns,
    "Coefficient": coefs
})
lr_sm_coefs["Abs_Coef"] = lr_sm_coefs["Coefficient"].abs()
lr_sm_coefs = lr_sm_coefs.sort_values("Abs_Coef", ascending=False)

print("\nTop 5 Features by Absolute Coefficient (SMOTE LR):")
print(lr_sm_coefs.head(5))

--- Logistic Regression (SMOTE Pipeline) Validation ---
PR-AUC (Average Precision): 0.8208

--- LogReg (SMOTE) (threshold=0.05) ---
PR-AUC: 0.8208
Precision: 0.0293 | Recall: 0.9091 | F1: 0.0569
Confusion Matrix:
 [[40850  1654]
 [    5    50]]

--- LogReg (SMOTE) (Top-N=200) ---
PR-AUC: 0.8208
Precision@Top200: 0.2400 | Recall@Top200: 0.8727 | F1@Top200: 0.3765

Top 5 Features by Absolute Coefficient (SMOTE LR):
   Feature  Coefficient  Abs_Coef
15     V15    -0.396654  0.396654
4       V4    -0.277498  0.277498
13     V13    -0.234228  0.234228
5       V5     0.233138  0.233138
11     V11    -0.208442  0.208442


### Random Forest (with SMOTE)

In [42]:
import pandas as pd
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import average_precision_score

rf_smote_pipe = ImbPipeline(steps=[
    ("smote", SMOTE(random_state=42)),
    ("rf", RandomForestClassifier(
        n_estimators=300,
        max_depth=10,
        n_jobs=-1,
        random_state=42
    ))
])

rf_smote_pipe.fit(X_train, y_train)

val_probs = rf_smote_pipe.predict_proba(X_val)[:, 1]
val_ap = average_precision_score(y_val, val_probs)

print("--- Random Forest (SMOTE Pipeline) Validation ---")
print(f"PR-AUC (Average Precision): {val_ap:.4f}")

evaluate_at_threshold(rf_smote_pipe, X_val, y_val, name="RF (SMOTE)", threshold=0.05)
evaluate_at_top_n(rf_smote_pipe, X_val, y_val, name="RF (SMOTE)", top_n=200)

# Feature importance (from RF inside pipeline)
rf_step = rf_smote_pipe.named_steps["rf"]
rf_sm_importances = pd.DataFrame({
    "Feature": X_train.columns,
    "Importance": rf_step.feature_importances_
}).sort_values("Importance", ascending=False)

print("\nTop 5 Feature Importances (RF SMOTE):")
print(rf_sm_importances.head(5))

--- Random Forest (SMOTE Pipeline) Validation ---
PR-AUC (Average Precision): 0.8506

--- RF (SMOTE) (threshold=0.05) ---
PR-AUC: 0.8506
Precision: 0.0099 | Recall: 0.9273 | F1: 0.0195
Confusion Matrix:
 [[37390  5114]
 [    4    51]]

--- RF (SMOTE) (Top-N=200) ---
PR-AUC: 0.8506
Precision@Top200: 0.2450 | Recall@Top200: 0.8909 | F1@Top200: 0.3843

Top 5 Feature Importances (RF SMOTE):
   Feature  Importance
14     V14    0.192877
10     V10    0.139928
12     V12    0.118468
4       V4    0.112564
17     V17    0.096249


### XGBoost (with SMOTE)

In [43]:
import pandas as pd
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from sklearn.metrics import average_precision_score

xgb_smote_pipe = ImbPipeline(steps=[
    ("smote", SMOTE(random_state=42)),
    ("xgb", XGBClassifier(
        eval_metric="logloss",
        random_state=42,
        n_estimators=800,
        max_depth=4,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        n_jobs=-1
    ))
])

xgb_smote_pipe.fit(X_train, y_train)

val_probs = xgb_smote_pipe.predict_proba(X_val)[:, 1]
val_ap = average_precision_score(y_val, val_probs)

print("--- XGBoost (SMOTE Pipeline) Validation ---")
print(f"PR-AUC (Average Precision): {val_ap:.4f}")

evaluate_at_threshold(xgb_smote_pipe, X_val, y_val, name="XGB (SMOTE)", threshold=0.05)
evaluate_at_top_n(xgb_smote_pipe, X_val, y_val, name="XGB (SMOTE)", top_n=200)

# Feature importance (from XGB inside pipeline)
xgb_step = xgb_smote_pipe.named_steps["xgb"]
xgb_sm_importances = pd.DataFrame({
    "Feature": X_train.columns,
    "Importance": xgb_step.feature_importances_
}).sort_values("Importance", ascending=False)

print("\nTop 5 Feature Importances (XGB SMOTE):")
print(xgb_sm_importances.head(5))

--- XGBoost (SMOTE Pipeline) Validation ---
PR-AUC (Average Precision): 0.8328

--- XGB (SMOTE) (threshold=0.05) ---
PR-AUC: 0.8328
Precision: 0.2260 | Recall: 0.8545 | F1: 0.3574
Confusion Matrix:
 [[42343   161]
 [    8    47]]

--- XGB (SMOTE) (Top-N=200) ---
PR-AUC: 0.8328
Precision@Top200: 0.2350 | Recall@Top200: 0.8545 | F1@Top200: 0.3686

Top 5 Feature Importances (XGB SMOTE):
   Feature  Importance
14     V14    0.440862
10     V10    0.194849
12     V12    0.066844
17     V17    0.054192
4       V4    0.048823


### Pipeline for XGBOOST + SMOTE

In [44]:
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import average_precision_score

final_pipeline = ImbPipeline(steps=[
    ("scaler", RobustScaler()),          # optional for XGB
    ("smote", SMOTE(random_state=42)),
    ("xgb", XGBClassifier(
        eval_metric="logloss",
        random_state=42,
        n_estimators=800,
        max_depth=4,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        n_jobs=-1
    ))
])

final_pipeline.fit(X_train, y_train)

# Validation PR-AUC
val_probs = final_pipeline.predict_proba(X_val)[:, 1]
val_ap = average_precision_score(y_val, val_probs)
print("--- FINAL PIPELINE (XGB+SMOTE): VALIDATION ---")
print(f"PR-AUC (Average Precision): {val_ap:.4f}")

# Operational eval (choose one or both)
evaluate_at_top_n(final_pipeline, X_val, y_val, name="Final XGB+SMOTE", top_n=200)
evaluate_at_threshold(final_pipeline, X_val, y_val, name="Final XGB+SMOTE", threshold=0.05)

# Test PR-AUC
test_probs = final_pipeline.predict_proba(X_test)[:, 1]
test_ap = average_precision_score(y_test, test_probs)
print("\n--- FINAL PIPELINE (XGB+SMOTE): TEST ---")
print(f"PR-AUC (Average Precision): {test_ap:.4f}")

# Operational test eval
evaluate_at_top_n(final_pipeline, X_test, y_test, name="Final XGB+SMOTE (TEST)", top_n=200)

--- FINAL PIPELINE (XGB+SMOTE): VALIDATION ---
PR-AUC (Average Precision): 0.8306

--- Final XGB+SMOTE (Top-N=200) ---
PR-AUC: 0.8306
Precision@Top200: 0.2350 | Recall@Top200: 0.8545 | F1@Top200: 0.3686

--- Final XGB+SMOTE (threshold=0.05) ---
PR-AUC: 0.8306
Precision: 0.3172 | Recall: 0.8364 | F1: 0.4600
Confusion Matrix:
 [[42405    99]
 [    9    46]]

--- FINAL PIPELINE (XGB+SMOTE): TEST ---
PR-AUC (Average Precision): 0.7659

--- Final XGB+SMOTE (TEST) (Top-N=200) ---
PR-AUC: 0.7659
Precision@Top200: 0.2050 | Recall@Top200: 0.7885 | F1@Top200: 0.3254


(0.7658630072182174,
 np.float64(0.205),
 np.float64(0.7884615384615384),
 np.float64(0.3253968253968254))

### Model Comparision Table

In [45]:
import numpy as np
import pandas as pd
from sklearn.metrics import average_precision_score

def metrics_at_top_n(model, X, y, top_n):
    probs = model.predict_proba(X)[:, 1]
    ap = average_precision_score(y, probs)

    idx = np.argsort(probs)[::-1]
    preds = np.zeros(len(y), dtype=int)
    preds[idx[:top_n]] = 1

    y_true = np.array(y)

    tp = ((preds == 1) & (y_true == 1)).sum()
    fp = ((preds == 1) & (y_true == 0)).sum()
    fn = ((preds == 0) & (y_true == 1)).sum()

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0.0

    return ap, precision, recall, f1

TOP_N = 200

models = {
    "LogReg (Under+Scaled)": lr_model,
    "GNB (Under)": gnb,
    "RF (Under Pipeline)": rf_under_pipe,
    "XGB (Weighted)": xgb_weighted,

    "LogReg (SMOTE)": lr_smote_pipe,
    "RF (SMOTE)": rf_smote_pipe,
    "XGB (SMOTE)": xgb_smote_pipe,
}

rows = []
for name, m in models.items():
    ap, p, r, f1 = metrics_at_top_n(m, X_val, y_val, TOP_N)
    rows.append({
        "Model": name,
        "PR-AUC": ap,
        f"Precision@Top{TOP_N}": p,
        f"Recall@Top{TOP_N}": r,
        f"F1@Top{TOP_N}": f1
    })

comparison_df = pd.DataFrame(rows).sort_values("PR-AUC", ascending=False)
comparison_df



Unnamed: 0,Model,PR-AUC,Precision@Top200,Recall@Top200,F1@Top200
3,XGB (Weighted),0.851365,0.24,0.872727,0.376471
5,RF (SMOTE),0.850598,0.245,0.890909,0.384314
2,RF (Under Pipeline),0.841342,0.24,0.872727,0.376471
6,XGB (SMOTE),0.83281,0.235,0.854545,0.368627
4,LogReg (SMOTE),0.820811,0.24,0.872727,0.376471
0,LogReg (Under+Scaled),0.361482,0.23,0.836364,0.360784
1,GNB (Under),0.053858,0.07,0.254545,0.109804


In [46]:
best_name = comparison_df.iloc[0]["Model"]
best_model = models[best_name]

ap_t, p_t, r_t, f1_t = metrics_at_top_n(best_model, X_test, y_test, TOP_N)

print("\nBest model:", best_name)
print(f"TEST PR-AUC: {ap_t:.4f}")
print(f"TEST Precision@Top{TOP_N}: {p_t:.4f}")
print(f"TEST Recall@Top{TOP_N}: {r_t:.4f}")
print(f"TEST F1@Top{TOP_N}: {f1_t:.4f}")


Best model: XGB (Weighted)
TEST PR-AUC: 0.7729
TEST Precision@Top200: 0.2050
TEST Recall@Top200: 0.7885
TEST F1@Top200: 0.3254
