# Model Training - Bank Fraud Detection

This notebook trains multiple models with:
- Hyperparameter tuning
- Class imbalance handling
- Recall optimization
- Model comparison


In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.append('..')

from src.data.data_loader import DataLoader
from src.data.data_preprocessor import FraudDataPreprocessor
from src.models.model_trainer import FraudModelTrainer
import warnings
warnings.filterwarnings('ignore')


ImportError: cannot import name 'FraudDataPreprocessor' from 'src.data.data_preprocessor' (c:\Users\Administrator\OneDrive\Desktop\fraud-detection\notebooks\..\src\data\data_preprocessor.py)

In [5]:
# Load processed data
X_train = pd.read_csv('../data/processed/X_train.csv')
X_val = pd.read_csv('../data/processed/X_val.csv')
y_train = pd.read_csv('../data/processed/y_train.csv')['is_fraud']
y_val = pd.read_csv('../data/processed/y_val.csv')['is_fraud']

print(f"Train: {X_train.shape}, Val: {X_val.shape}")


FileNotFoundError: [Errno 2] No such file or directory: '../data/processed/X_train.csv'

In [None]:
# Preprocess data
preprocessor = FraudDataPreprocessor(
    handle_imbalance=True,
    balance_method="smote",
    random_state=42
)

X_train_proc, y_train_proc = preprocessor.fit_transform(X_train, y_train)
preprocessor.handle_imbalance = False  # Don't balance validation
X_val_proc, y_val_proc = preprocessor.transform(X_val, y_val)

print(f"Processed shapes - Train: {X_train_proc.shape}, Val: {X_val_proc.shape}")


In [None]:
# Train models
trainer = FraudModelTrainer(random_state=42, scoring="recall")

models_to_train = ["logistic_regression", "random_forest", "xgboost", "lightgbm"]

results = trainer.train_all_models(
    X_train_proc,
    y_train_proc,
    models_to_train=models_to_train,
    cv=3,
    n_iter=10
)


In [None]:
# Select best model
best_name, best_model = trainer.select_best_model(X_val_proc, y_val_proc, metric="recall")

print(f"\nBest Model: {best_name}")
print(f"Best model saved to: models/{best_name}_model.pkl")
