In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, log_loss
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from typing import Dict, Any

In [2]:
X_train = pd.read_pickle('train_set_final_filtered.pkl').drop(columns=['is_fraud'])
y_train = pd.read_pickle('train_set_final_filtered.pkl')['is_fraud']
X_dev = pd.read_pickle('dev_set_final_filtered.pkl').drop(columns=['is_fraud'])
y_dev = pd.read_pickle('dev_set_final_filtered.pkl')['is_fraud']

In [3]:
def get_classification_metrics(y_true: pd.Series, y_pred: np.ndarray, y_proba: np.ndarray) -> Dict[str, float]:
    """Calculates all key classification metrics, focusing on the minority class (1)."""
    return {
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision': precision_score(y_true, y_pred, zero_division=0),
        'Recall': recall_score(y_true, y_pred, zero_division=0),
        'F1-Score': f1_score(y_true, y_pred, zero_division=0),
        'Log-loss': log_loss(y_true, y_proba),
        'AUC': roc_auc_score(y_true, y_proba)
    }

## Models

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import xgboost as xgb

In [5]:
baseline_models = {
    'Logistic Regression': LogisticRegression(solver='saga', C=0.1, random_state=42, max_iter=2000), 
    'Decision Tree': DecisionTreeClassifier(max_depth=6, random_state=42), 
    'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=8, random_state=42, n_jobs=-1),
    'GBM': GradientBoostingClassifier(n_estimators=100, max_depth=5, random_state=42),
    'ADABoost': AdaBoostClassifier(n_estimators=100, random_state=42, estimator=DecisionTreeClassifier(max_depth=2)),
    'XGB': xgb.XGBClassifier(objective='binary:logistic', use_label_encoder=False, eval_metric='logloss', random_state=42, n_jobs=-1),
    #SVM removed due to long training times
    #'SVM': SVC(probability=True)
}   

In [6]:
def run_model_comparison(models: Dict[str, Any], X_train: pd.DataFrame, y_train: pd.Series, X_dev: pd.DataFrame, y_dev: pd.Series) -> pd.DataFrame:
    """Trains and evaluates the baseline models with stratified class weighting."""
    results = []
    
    #Calculates the imbalance ratio for class weighting
    imbalance_ratio = (y_train == 0).sum() / (y_train == 1).sum()
    class_weights = {0: 1, 1: imbalance_ratio}

    for name, model in models.items():
        print(f"-> Training {name}...")
        
        # --- Imbalance Handling ---
        if name in ['Logistic Regression', 'Decision Tree', 'RandomForest']:
             model.set_params(class_weight=class_weights)
        elif name == 'XGB':
             model.set_params(scale_pos_weight=imbalance_ratio)
        
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_dev)
        
        if hasattr(model, "predict_proba"):
            y_proba = model.predict_proba(X_dev)[:, 1]
        elif hasattr(model, "decision_function"):
            y_proba = model.predict_proba(X_dev)[:, 1]
        else:
            y_proba = y_pred 

        metrics = get_classification_metrics(y_dev, y_pred, y_proba)
        metrics['model'] = name
        results.append(metrics)
    
    return pd.DataFrame(results)

In [7]:
comparison_df = run_model_comparison(baseline_models, X_train, y_train, X_dev, y_dev)

comparison_df = comparison_df.sort_values(by='AUC', ascending=False).reset_index(drop=True)

print("\n" + "="*85)
print("             BASELINE MODEL PERFORMANCE COMPARISON (on Validation Set)")
print("="*85)

comparison_df = comparison_df[['model', 'Accuracy', 'Precision', 'Recall', 'F1-Score', 'Log-loss', 'AUC']]
print(comparison_df.to_markdown(index=True, floatfmt=".6f"))

-> Training Logistic Regression...
-> Training Decision Tree...
-> Training RandomForest...
-> Training GBM...
-> Training ADABoost...
-> Training XGB...

             BASELINE MODEL PERFORMANCE COMPARISON (on Validation Set)
|    | model               |   Accuracy |   Precision |   Recall |   F1-Score |   Log-loss |      AUC |
|---:|:--------------------|-----------:|------------:|---------:|-----------:|-----------:|---------:|
|  0 | XGB                 |   0.996127 |    0.586503 | 0.962668 |   0.728916 |   0.011413 | 0.997407 |
|  1 | ADABoost            |   0.998868 |    0.970160 | 0.815789 |   0.886303 |   0.399857 | 0.995788 |
|  2 | RandomForest        |   0.986852 |    0.286289 | 0.958384 |   0.440878 |   0.089809 | 0.994249 |
|  3 | Logistic Regression |   0.973167 |    0.160512 | 0.936353 |   0.274046 |   0.094169 | 0.986303 |
|  4 | Decision Tree       |   0.989136 |    0.324307 | 0.930845 |   0.481025 |   0.094036 | 0.966176 |
|  5 | GBM                 |   0.998659 |    0

Fine Tuning

using a manual search due to a bug in the RandomizedSearchCV wrapper

In [10]:
imbalance_ratio = (y_train == 0).sum() / (y_train == 1).sum()
def evaluate_xgb_params(params: dict, X_train: pd.DataFrame, y_train: pd.Series, X_dev: pd.DataFrame, y_dev: pd.Series):
    """Trains XGBoost with a given parameter set and evaluates on the Dev set."""
    
    model = xgb.XGBClassifier(
        objective='binary:logistic',
        use_label_encoder=False,
        eval_metric='logloss',
        scale_pos_weight=imbalance_ratio,
        random_state=42,
        n_jobs=-1,
        **params 
    )

    print(f"  -> Fitting model with max_depth={params['max_depth']}, learning_rate={params['learning_rate']}...")
    model.fit(X_train, y_train)

    y_proba = model.predict_proba(X_dev)[:, 1]
    
    y_pred = model.predict(X_dev)
    
    auc = roc_auc_score(y_dev, y_proba)
    recall = recall_score(y_dev, y_pred, zero_division=0)
    precision = precision_score(y_dev, y_pred, zero_division=0)

    return auc, recall, precision

In [11]:
parameter_sets = [
    {'n_estimators': 150, 'max_depth': 6, 'learning_rate': 0.1, 'gamma': 0.5},
    
    {'n_estimators': 300, 'max_depth': 8, 'learning_rate': 0.05, 'gamma': 0.1},
    
    {'n_estimators': 100, 'max_depth': 4, 'learning_rate': 0.2, 'gamma': 1},
]

manual_results = []

print("\n" + "="*50)
print("STARTING MANUAL XGBOOST FINE-TUNING")
print("="*50)

for i, params in enumerate(parameter_sets):
    auc, recall, precision = evaluate_xgb_params(params, X_train, y_train, X_dev, y_dev)
    
    manual_results.append({
        'Set': i + 1,
        'Max_Depth': params['max_depth'],
        'Learning_Rate': params['learning_rate'],
        'AUC': auc,
        'Recall': recall,
        'Precision': precision
    })
    
results_df = pd.DataFrame(manual_results).sort_values(by='AUC', ascending=False)
print("\n--- Manual Tuning Results (Sorted by AUC) ---")
print(results_df.to_markdown(index=False, floatfmt=".4f"))

best_set_index = results_df.iloc[0]['Set']

best_params_index = int(best_set_index) - 1
best_params = parameter_sets[best_params_index]

print(f"\nRecommended Best Hyperparameters: {best_params}")


STARTING MANUAL XGBOOST FINE-TUNING
  -> Fitting model with max_depth=6, learning_rate=0.1...
  -> Fitting model with max_depth=8, learning_rate=0.05...
  -> Fitting model with max_depth=4, learning_rate=0.2...

--- Manual Tuning Results (Sorted by AUC) ---
|    Set |   Max_Depth |   Learning_Rate |    AUC |   Recall |   Precision |
|-------:|------------:|----------------:|-------:|---------:|------------:|
| 1.0000 |      6.0000 |          0.1000 | 0.9974 |   0.9712 |      0.4383 |
| 2.0000 |      8.0000 |          0.0500 | 0.9974 |   0.9651 |      0.5743 |
| 3.0000 |      4.0000 |          0.2000 | 0.9971 |   0.9743 |      0.3567 |

Recommended Best Hyperparameters: {'n_estimators': 150, 'max_depth': 6, 'learning_rate': 0.1, 'gamma': 0.5}
