# Dataset2 Model Benchmarks

This notebook evaluates multiple classical and neural classifiers on Dataset2 using the preprocessed train/test CSVs.

Models included:
- Logistic Regression (balanced)
- Decision Tree (balanced)
- Random Forest (balanced)
- Gradient Boosting
- Linear SVM (balanced)
- MLPClassifier (neural network)
- Optional: XGBoost (if installed)

For each model, we compute:
- Accuracy, Precision, Recall, F1
- ROC-AUC, PR-AUC
- Confusion Matrix
- Classification Report

All results are summarized for easy comparison with your FT-Transformer.


In [1]:
# Paths and configuration
train_dataset_path = '../data/train.csv'
test_dataset_path = '../data/test.csv'
metadata_path = '../data/preprocessing_metadata.json'
class_label = 'Class'
random_seed = 30

import warnings
warnings.filterwarnings('ignore')


In [2]:
# Imports
import pandas as pd
import numpy as np
import json
from pathlib import Path

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, roc_auc_score,
    precision_recall_curve, roc_curve, average_precision_score
)

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

try:
    from xgboost import XGBClassifier
    XGB_AVAILABLE = True
except Exception:
    XGB_AVAILABLE = False

# For type hints
from typing import Dict


In [None]:
print('Loading Dataset...')
train_df = pd.read_csv(train_dataset_path)
test_df = pd.read_csv(test_dataset_path)
print(train_df.shape, test_df.shape)

# Identify features/target
feature_cols = [c for c in train_df.columns if c != class_label]
X_train = train_df[feature_cols].copy()
y_train = train_df[class_label].astype(int).values
X_test = test_df[feature_cols].copy()
y_test = test_df[class_label].astype(int).values

print(f"Train: {X_train.shape}, Test: {X_test.shape}")
print('Class balance (train):', np.bincount(y_train))
print('Class balance (test):', np.bincount(y_test))


Loading Dataset2...
(409412, 31) (28481, 31)
Train: (409412, 30), Test: (28481, 30)
Class balance (train): [255883 153529]
Class balance (test): [28432    49]


In [None]:
# Utility: evaluate model
from typing import Dict


def format_classification_report_latex(y_true, y_pred, class_names=("Non-Fraud", "Fraud"), digits: int = 4) -> str:
    """Return a LaTeX table for the classification report matching the requested template."""
    rep = classification_report(
        y_true,
        y_pred,
        target_names=list(class_names),
        output_dict=True,
        zero_division=0,
        digits=digits,
    )

    def fmt(x: float) -> str:
        return f"{x:.{digits}f}"

    non_fraud = rep[class_names[0]]
    fraud = rep[class_names[1]]
    accuracy_val = rep["accuracy"]
    macro_avg = rep["macro avg"]
    weighted_avg = rep["weighted avg"]
    total_support = int(non_fraud["support"] + fraud["support"])

    lines = []
    lines.append(r"\begin{table}[h!]")
    lines.append(r"\centering")
    lines.append(r"\begin{tabular}{lcccc}")
    lines.append(r"\hline")
    lines.append(r"\textbf{Class} & \textbf{Precision} & \textbf{Recall} & \textbf{F1-Score} & \textbf{Support} \\")
    lines.append(r"\hline")
    lines.append(f"Non-Fraud      & {fmt(non_fraud['precision'])} & {fmt(non_fraud['recall'])} & {fmt(non_fraud['f1-score'])} & {int(non_fraud['support'])} \\\\")
    lines.append(f"Fraud          & {fmt(fraud['precision'])} & {fmt(fraud['recall'])} & {fmt(fraud['f1-score'])} & {int(fraud['support'])} \\\\")
    lines.append(r"\hline")
    lines.append(f"Accuracy       &        &        & {fmt(accuracy_val)} & {total_support} \\\\")
    lines.append(f"Macro Avg      & {fmt(macro_avg['precision'])} & {fmt(macro_avg['recall'])} & {fmt(macro_avg['f1-score'])} & {total_support} \\\\")
    lines.append(f"Weighted Avg   & {fmt(weighted_avg['precision'])} & {fmt(weighted_avg['recall'])} & {fmt(weighted_avg['f1-score'])} & {total_support} \\\\")
    lines.append(r"\hline")
    lines.append(r"\end{tabular}")
    lines.append(r"\caption{Classification Report}")
    lines.append(r"\label{tab:classification_report}")
    lines.append(r"\end{table}")

    return "\n".join(lines)


def evaluate_classifier(model, X_test, y_test, name: str) -> Dict:
    y_pred = model.predict(X_test)
    if hasattr(model, 'predict_proba'):
        y_pred_proba = model.predict_proba(X_test)[:, 1]
    elif hasattr(model, 'decision_function'):
        # For LinearSVC
        scores = model.decision_function(X_test)
        # Convert scores to [0,1] via min-max for AUC/PR (ranking is what matters)
        scores = (scores - scores.min()) / (scores.max() - scores.min() + 1e-8)
        y_pred_proba = scores
    else:
        y_pred_proba = (y_pred == 1).astype(float)
    
    metrics = {
        'model': name,
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred, zero_division=0),
        'recall': recall_score(y_test, y_pred, zero_division=0),
        'f1': f1_score(y_test, y_pred, zero_division=0),
        'roc_auc': roc_auc_score(y_test, y_pred_proba),
        'pr_auc': precision_recall_curve(y_test, y_pred_proba),
        'confusion_matrix': confusion_matrix(y_test, y_pred)
    }
    
    print(f"\n=== {name} ===")
    print(f"Accuracy: {metrics['accuracy']:.4f} | Precision: {metrics['precision']:.4f} | Recall: {metrics['recall']:.4f} | F1: {metrics['f1']:.4f}")
    print(f"ROC-AUC: {metrics['roc_auc']:.4f} | PR-AUC: {metrics['pr_auc']:.4f}")
    print("Confusion Matrix:\n", metrics['confusion_matrix'])

    # Print LaTeX-formatted classification report and save alongside results for easy copy/paste
    latex_report = format_classification_report_latex(y_test, y_pred, class_names=("Non-Fraud", "Fraud"), digits=4)
    print("\nLaTeX Classification Report:\n")
    print(latex_report)

    # Also save to the results directory
    # out_dir = Path('../results')
    # out_dir.mkdir(parents=True, exist_ok=True)
    # with open(out_dir / f"dataset2_{name.lower()}_classification_report.tex", 'w') as f:
    #     f.write(latex_report + "\n")
    
    return metrics


In [5]:
# Define models
models = []

# Logistic Regression (balanced)
models.append(('LogisticRegression', LogisticRegression(max_iter=100000, class_weight='balanced', n_jobs=None)))

# # Decision Tree (balanced)
models.append(('DecisionTree', DecisionTreeClassifier(class_weight='balanced', random_state=random_seed)))

# Random Forest (balanced)
models.append(('RandomForest', RandomForestClassifier(n_estimators=100, max_depth=None, n_jobs=-1, class_weight='balanced_subsample', random_state=random_seed)))

# # Gradient Boosting
models.append(('GradientBoosting', GradientBoostingClassifier(random_state=random_seed)))

# # Linear SVM (use LinearSVC, which does not output proba)
models.append(('LinearSVM', LinearSVC(class_weight='balanced', random_state=random_seed)))

# MLP (neural network)
models.append(('MLP', MLPClassifier(hidden_layer_sizes=(128, 64), activation='relu', solver='adam', alpha=1e-4,
                                    batch_size=2048, learning_rate_init=1e-3, max_iter=50, random_state=random_seed)))

# XGBoost (optional)
if XGB_AVAILABLE:
    models.append(('XGBoost', XGBClassifier(
        n_estimators=500, max_depth=6, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8,
        reg_lambda=1.0, n_jobs=-1, random_state=random_seed, objective='binary:logistic', eval_metric='auc'
    )))

print(f"Defined {len(models)} models")


Defined 6 models


In [6]:
# Train and evaluate
results = []

for name, model in models:
    print(f"\nTraining {name}...")
    model.fit(X_train, y_train)
    metrics = evaluate_classifier(model, X_test, y_test, name)
    results.append(metrics)

# Create summary DataFrame
results_df = pd.DataFrame([{k: v for k, v in r.items() if k not in ['confusion_matrix']} for r in results])
results_df = results_df.sort_values(by='pr_auc', ascending=False)
print('\n=== Summary (sorted by PR-AUC) ===')
print(results_df[['model','accuracy','precision','recall','f1','roc_auc','pr_auc']])



Training LogisticRegression...

=== LogisticRegression ===
Accuracy: 0.9735 | Precision: 0.0566 | Recall: 0.9184 | F1: 0.1066
ROC-AUC: 0.9576 | PR-AUC: 0.7161
Confusion Matrix:
 [[27682   750]
 [    4    45]]

LaTeX Classification Report:

\begin{table}[h!]
\centering
\begin{tabular}{lcccc}
\hline
\textbf{Class} & \textbf{Precision} & \textbf{Recall} & \textbf{F1-Score} & \textbf{Support} \\
\hline
Non-Fraud      & 0.9999 & 0.9736 & 0.9866 & 28432 \\
Fraud          & 0.0566 & 0.9184 & 0.1066 & 49 \\
\hline
Accuracy       &        &        & 0.9735 & 28481 \\
Macro Avg      & 0.5282 & 0.9460 & 0.5466 & 28481 \\
Weighted Avg   & 0.9982 & 0.9735 & 0.9851 & 28481 \\
\hline
\end{tabular}
\caption{Classification Report}
\label{tab:classification_report}
\end{table}

Training DecisionTree...

=== DecisionTree ===
Accuracy: 0.9973 | Precision: 0.3592 | Recall: 0.7551 | F1: 0.4868
ROC-AUC: 0.8764 | PR-AUC: 0.2717
Confusion Matrix:
 [[28366    66]
 [   12    37]]

LaTeX Classification Report:

