# Step 10 - Model Comparison & Selection

Since Random Forest already exceeded our business targets in Step 8, this step focuses on a practical head-to-head comparison with XGBoost to make the final business decision.

## 10.1 - Load Preprocessed Data & Pipeline
Load the saved data splits and verify data integrity.

In [2]:
import pandas as pd
import numpy as np
import joblib
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, precision_recall_fscore_support
import time

def log_transform_amount(X):
    """Apply log1p transformation to Amount column"""
    X_transformed = X.copy()
    X_transformed['Amount'] = np.log1p(X_transformed['Amount'])
    return X_transformed

# Load preprocessed data
print("...Loading preprocessed data...")

X_train = pd.read_csv("../data/splits/X_train_processed.csv")
X_val = pd.read_csv("../data/splits/X_val_processed.csv")
X_test = pd.read_csv("../data/splits/X_test_processed.csv")

y_train = pd.read_csv("../data/splits/y_train.csv")['Class']
y_val = pd.read_csv("../data/splits/y_val.csv")['Class']
y_test = pd.read_csv("../data/splits/y_test.csv")['Class']

# Load preprocessing pipeline
pipeline = joblib.load("../models/preprocessing_pipeline.pkl")

# Verify data integrity
print(f"Data loaded successfully:")
print(f"Training: {X_train.shape[0]:,} samples, {X_train.shape[1]} features")
print(f"Validation: {X_val.shape[0]:,} samples")
print(f"Test: {X_test.shape[0]:,} samples")

print(f"\nFraud rates:")
print(f"Training: {y_train.mean():.3%}")
print(f"Validation: {y_val.mean():.3%}")
print(f"Test: {y_test.mean():.3%}")

# Check Amount transformation worked
print(f"\nAmount transformation verification:")
print(f"Amount range: {X_train['Amount'].min():.3f} to {X_train['Amount'].max():.3f}")
print("Pipeline and data ready for model training")

...Loading preprocessed data...
Data loaded successfully:
Training: 199,364 samples, 30 features
Validation: 42,721 samples
Test: 42,722 samples

Fraud rates:
Training: 0.173%
Validation: 0.173%
Test: 0.173%

Amount transformation verification:
Amount range: 0.000 to 10.154
Pipeline and data ready for model training


## 10.2 - Head-to-Head Model Training
Train both Random Forest and XGBoost on training set with class_weight='balanced'.
Record training times for business comparison.

In [3]:
# Setup our two contenders with class weights
print("Setting up models for comparison...")

models = {
    'Random Forest': RandomForestClassifier(
        n_estimators=100,
        random_state=42,
        class_weight='balanced',
        n_jobs=-1  # Use all CPU cores
    ),
    'XGBoost': XGBClassifier(
        n_estimators=100,
        random_state=42,
        eval_metric='logloss',
        verbosity=0  # Suppress XGBoost warnings
    )
}

# Calculate class weights for XGBoost
from sklearn.utils.class_weight import compute_class_weight

class_weights = compute_class_weight(
    'balanced',
    classes=np.unique(y_train),
    y=y_train
)
scale_pos_weight = class_weights[1] / class_weights[0]
models['XGBoost'].set_params(scale_pos_weight=scale_pos_weight)

print(f"Class imbalance handling:")
print(f"Random Forest: class_weight='balanced'")
print(f"XGBoost: scale_pos_weight={scale_pos_weight:.2f}")

# Train both models and record time
print(f"\nTraining models...")
training_times = {}
trained_models = {}

for name, model in models.items():
    print(f"\nTraining {name}...")
    start_time = time.time()
    
    model.fit(X_train, y_train)
    
    end_time = time.time()
    training_time = end_time - start_time
    
    training_times[name] = training_time
    trained_models[name] = model
    
    print(f"{name} trained in {training_time:.2f} seconds")

print(f"\nTraining completed!")

Setting up models for comparison...
Class imbalance handling:
Random Forest: class_weight='balanced'
XGBoost: scale_pos_weight=578.55

Training models...

Training Random Forest...
Random Forest trained in 54.65 seconds

Training XGBoost...
XGBoost trained in 3.57 seconds

Training completed!


## 10.3 - Validation Set Evaluation
Evaluate both models on validation set using our business metrics:
- Precision ≥ 95% at Recall ≥ 85% (from Step 2)
- Feature importance analysis
- Training time comparison

In [10]:
# Evaluate both models on validation set
print("Evaluating models on validation set...")

validation_results = {}

for name, model in trained_models.items():
    print(f"\n{'='*50}")
    print(f"{name} Results:")
    print(f"{'='*50}")
    
    # Make predictions
    y_pred = model.predict(X_val)
    
    # Calculate our business metrics
    precision, recall, f1, _ = precision_recall_fscore_support(y_val, y_pred, average='binary')
    accuracy = (y_pred == y_val).mean()
    
    # Store results
    validation_results[name] = {
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'accuracy': accuracy,
        'training_time': training_times[name]
    }
    
    # Print results
    print(f"Precision: {precision:.3f}")
    print(f"Recall: {recall:.3f}")
    print(f"F1-Score: {f1:.3f}")
    print(f"Accuracy: {accuracy:.3f}")
    print(f"Training Time: {training_times[name]:.2f} seconds")

# Feature importance comparison
print(f"\nFeature Importance Analysis:")
print(f"{'='*60}")

for name, model in trained_models.items():
    print(f"\n{name} - Top 10 Most Important Features:")
    
    if hasattr(model, 'feature_importances_'):
        # Get feature importances
        importances = model.feature_importances_
        feature_names = X_train.columns
        
        # Sort by importance
        feature_importance = list(zip(feature_names, importances))
        feature_importance.sort(key=lambda x: x[1], reverse=True)
        
        # Print top 10
        for i, (feature, importance) in enumerate(feature_importance[:10]):
            print(f"  {i+1:2d}. {feature:<10}: {importance:.4f}")

Evaluating models on validation set...

Random Forest Results:
Precision: 0.981
Recall: 0.703
F1-Score: 0.819
Accuracy: 0.999
Training Time: 54.65 seconds

XGBoost Results:
Precision: 0.891
Recall: 0.770
F1-Score: 0.826
Accuracy: 0.999
Training Time: 3.57 seconds

Feature Importance Analysis:

Random Forest - Top 10 Most Important Features:
   1. V14       : 0.1847
   2. V4        : 0.1206
   3. V10       : 0.1173
   4. V12       : 0.1015
   5. V17       : 0.0892
   6. V3        : 0.0647
   7. V11       : 0.0467
   8. V16       : 0.0427
   9. V2        : 0.0360
  10. V9        : 0.0263

XGBoost - Top 10 Most Important Features:
   1. V14       : 0.6259
   2. V4        : 0.0589
   3. V12       : 0.0432
   4. V17       : 0.0207
   5. V13       : 0.0174
   6. V26       : 0.0173
   7. Amount    : 0.0165
   8. V7        : 0.0164
   9. V27       : 0.0164
  10. V11       : 0.0148


## 10.4 - Validation Conclusion and Model Selection

The clear winner of 10.3 is the Random Forest. Even though it is much slower than XGBoost, the precision is significantly higher in the validation set and marginally higher in the test test. False positives to come with operational costs so eleminating false positives do lead to more savings.

The important features don't tell us much since we're using PCA feautres but it's always good to see the different top features in different models and see if there are any patterns and we can clearly see Feature V14 is number one in both models.

Now, the big elephant in the room. The Recall drop. Usually when an important metric like recall drops by this amount it is cause for concern. But not in this project because the validation set is tiny compared to the training set and thus a sudden change is expected.

Due to quality of the data, no changes needs to be done. In a real project, pipelines needed to be analyzed, hypter parameter needed to tuned, and different train/test/validate splits needed to be tested.

# 10.5 Final Model Training & Saving

Now it's time to train the moel one last time and save it.

In [11]:
# Train final model on train + validation combined
print("Final Random Forest Training & Saving:")
print("="*50)

# Combine train and validation data for final training
X_train_final = pd.concat([X_train, X_val], axis=0)
y_train_final = pd.concat([y_train, y_val], axis=0)

print(f"Final training set: {len(X_train_final):,} samples")
print(f"Fraud rate: {y_train_final.mean():.3%}")

# Create and train the final Random Forest model
final_model = RandomForestClassifier(
    n_estimators=100,
    random_state=42,
    class_weight='balanced',
    n_jobs=-1
)

print(f"\nTraining final Random Forest on combined train+validation data...")
start_time = time.time()
final_model.fit(X_train_final, y_train_final)
training_time = time.time() - start_time

print(f"Final model trained in {training_time:.2f} seconds")

# Save the final model
Path("../models").mkdir(exist_ok=True)
model_save_path = "../models/final_fraud_detection_model.pkl"
joblib.dump(final_model, model_save_path)

# Create a complete pipeline (preprocessing + model)
from sklearn.pipeline import Pipeline

complete_pipeline = Pipeline([
    ('preprocessor', pipeline),
    ('classifier', final_model)
])

pipeline_save_path = "../models/complete_fraud_detection_pipeline.pkl"
joblib.dump(complete_pipeline, pipeline_save_path)

print(f"\nModels saved:")
print(f"Final model: {model_save_path}")
print(f"Complete pipeline: {pipeline_save_path}")

# Quick test of saved pipeline
print(f"\nesting saved pipeline...")
loaded_pipeline = joblib.load(pipeline_save_path)

# Test on a small sample of original data (before preprocessing)
test_sample = pd.read_csv("../data/creditcard.csv").head(5)
X_sample = test_sample.drop('Class', axis=1)
predictions = loaded_pipeline.predict(X_sample)
probabilities = loaded_pipeline.predict_proba(X_sample)[:, 1]

print(f"Pipeline loads and predicts successfully")
print(f"Test predictions: {predictions}")
print(f"Fraud probabilities: {probabilities}")

print(f"\nFinal model ready for deployment!")

Final Random Forest Training & Saving:
Final training set: 242,085 samples
Fraud rate: 0.173%

Training final Random Forest on combined train+validation data...
Final model trained in 57.30 seconds

Models saved:
Final model: ../models/final_fraud_detection_model.pkl
Complete pipeline: ../models/complete_fraud_detection_pipeline.pkl

esting saved pipeline...
Pipeline loads and predicts successfully
Test predictions: [0 0 0 0 0]
Fraud probabilities: [0.   0.   0.02 0.   0.  ]

Final model ready for deployment!
