# Model Experiments Notebook

This notebook demonstrates model training, hyperparameter tuning, and comparison for financial derivative pricing.

In [None]:
import sys
import os
sys.path.append('../src')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import time
import warnings
warnings.filterwarnings('ignore')

# Import our custom modules
from data.data_generator import FinancialDataGenerator
from data.feature_engineer import FinancialFeatureEngineer
from models.random_forest import RandomForestPricingModel
from models.neural_network import SwaptionPricingNN
from models.ensemble import WeightedEnsemble
from utils.visualization import FinancialVisualizer

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
%matplotlib inline

## 1. Data Preparation

In [None]:
# Generate and prepare data
print("Preparing data for model experiments...")

# Generate option data
generator = FinancialDataGenerator(seed=42)
option_data = generator.generate_option_prices(n_samples=50000)

# Feature engineering
feature_engineer = FinancialFeatureEngineer()
option_features = feature_engineer.create_option_features(option_data)

# Select features for modeling
feature_cols = [
    'spot_price', 'strike_price', 'time_to_expiry', 'risk_free_rate', 'volatility',
    'moneyness', 'log_moneyness', 'vol_time', 'vol_sqrt_time'
]

# Prepare X and y
X = option_features[feature_cols]
y = option_features['call_price']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

print(f"Training set: {X_train.shape}")
print(f"Validation set: {X_val.shape}")
print(f"Test set: {X_test.shape}")
print(f"Features: {list(X.columns)}")

## 2. Model Training and Evaluation Functions

In [None]:
def evaluate_model(model, X_test, y_test, model_name):
    """Evaluate model performance"""
    start_time = time.time()
    y_pred = model.predict(X_test)
    inference_time = time.time() - start_time
    
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Calculate MAPE
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
    
    results = {
        'model': model_name,
        'mse': mse,
        'rmse': rmse,
        'mae': mae,
        'r2': r2,
        'mape': mape,
        'inference_time': inference_time,
        'predictions': y_pred
    }
    
    return results

def plot_predictions(y_true, y_pred, model_name, sample_size=1000):
    """Plot prediction vs actual values"""
    # Sample for plotting
    indices = np.random.choice(len(y_true), min(sample_size, len(y_true)), replace=False)
    y_true_sample = y_true.iloc[indices] if hasattr(y_true, 'iloc') else y_true[indices]
    y_pred_sample = y_pred[indices]
    
    plt.figure(figsize=(12, 5))
    
    # Scatter plot
    plt.subplot(1, 2, 1)
    plt.scatter(y_true_sample, y_pred_sample, alpha=0.6, s=1)
    plt.plot([y_true_sample.min(), y_true_sample.max()], 
             [y_true_sample.min(), y_true_sample.max()], 
             'r--', linewidth=2)
    plt.xlabel('Actual Price')
    plt.ylabel('Predicted Price')
    plt.title(f'{model_name}: Predicted vs Actual')
    plt.grid(True, alpha=0.3)
    
    # Residual plot
    plt.subplot(1, 2, 2)
    residuals = y_pred_sample - y_true_sample
    plt.scatter(y_true_sample, residuals, alpha=0.6, s=1)
    plt.axhline(y=0, color='r', linestyle='--', linewidth=2)
    plt.xlabel('Actual Price')
    plt.ylabel('Residual')
    plt.title(f'{model_name}: Residual Plot')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

def cross_validate_model(model_class, X, y, params=None, cv=5):
    """Perform cross-validation for a model"""
    if params is None:
        params = {}
    
    cv_scores = []
    kf = KFold(n_splits=cv, shuffle=True, random_state=42)
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(X)):
        # Create model instance
        model = model_class(**params)
        
        # Train
        X_fold_train, y_fold_train = X.iloc[train_idx], y.iloc[train_idx]
        model.train(X_fold_train, y_fold_train)
        
        # Validate
        X_fold_val, y_fold_val = X.iloc[val_idx], y.iloc[val_idx]
        y_pred = model.predict(X_fold_val)
        
        # Calculate score
        rmse = np.sqrt(mean_squared_error(y_fold_val, y_pred))
        cv_scores.append(rmse)
        
    return {
        'mean_cv_score': np.mean(cv_scores),
        'std_cv_score': np.std(cv_scores),
        'cv_scores': cv_scores
    }

## 3. Random Forest Model Experiments

In [None]:
# Train Random Forest model
print("Training Random Forest model...")
rf_model = RandomForestPricingModel(
    n_estimators=100,
    max_depth=20,
    random_state=42
)

# Train the model
rf_model.train(X_train, y_train)

# Evaluate on validation set
rf_val_results = evaluate_model(rf_model, X_val, y_val, "Random Forest (Val)")
print(f"Random Forest Validation Results:")
print(f"  RMSE: ${rf_val_results['rmse']:.4f}")
print(f"  MAE: ${rf_val_results['mae']:.4f}")
print(f"  R²: {rf_val_results['r2']:.4f}")
print(f"  MAPE: {rf_val_results['mape']:.2f}%")

# Feature importance
rf_importance = rf_model.get_feature_importance()
if rf_importance:
    print(f"\nTop 5 important features:")
    sorted_importance = sorted(rf_importance.items(), key=lambda x: x[1], reverse=True)
    for feature, importance in sorted_importance[:5]:
        print(f"  {feature}: {importance:.4f}")

In [None]:
# Hyperparameter tuning for Random Forest
print("\nPerforming hyperparameter tuning for Random Forest...")

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Simple grid search (in practice, use RandomizedSearchCV)
best_score = float('inf')
best_params = {}

for n_est in param_grid['n_estimators']:
    for max_d in param_grid['max_depth']:
        for min_split in param_grid['min_samples_split']:
            for min_leaf in param_grid['min_samples_leaf']:
                model = RandomForestPricingModel(
                    n_estimators=n_est,
                    max_depth=max_d,
                    min_samples_split=min_split,
                    min_samples_leaf=min_leaf,
                    random_state=42
                )
                
                # Quick cross-validation
                cv_results = cross_validate_model(
                    RandomForestPricingModel,
                    X_train.sample(5000), y_train.sample(5000),  # Smaller sample for speed
                    {
                        'n_estimators': n_est,
                        'max_depth': max_d,
                        'min_samples_split': min_split,
                        'min_samples_leaf': min_leaf,
                        'random_state': 42
                    },
                    cv=3
                )
                
                if cv_results['mean_cv_score'] < best_score:
                    best_score = cv_results['mean_cv_score']
                    best_params = {
                        'n_estimators': n_est,
                        'max_depth': max_d,
                        'min_samples_split': min_split,
                        'min_samples_leaf': min_leaf
                    }

print(f"Best parameters: {best_params}")
print(f"Best CV score: ${best_score:.4f}")

# Train best model
best_rf_model = RandomForestPricingModel(**best_params, random_state=42)
best_rf_model.train(X_train, y_train)

## 4. Neural Network Model Experiments

In [None]:
# Prepare data for neural network (scaling required)
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(
    scaler.fit_transform(X_train),
    columns=X_train.columns,
    index=X_train.index
)
X_val_scaled = pd.DataFrame(
    scaler.transform(X_val),
    columns=X_val.columns,
    index=X_val.index
)
X_test_scaled = pd.DataFrame(
    scaler.transform(X_test),
    columns=X_test.columns,
    index=X_test.index
)

# Train Neural Network model
print("Training Neural Network model...")
nn_model = SwaptionPricingNN(
    input_dim=X_train.shape[1],
    hidden_dims=[128, 64, 32],
    dropout_rate=0.2,
    l2_reg=1e-4
)

# Train the model
nn_model.train(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=50,  # Reduced for demo
    batch_size=64,
    learning_rate=1e-3
)

# Evaluate
nn_val_results = evaluate_model(nn_model, X_val_scaled, y_val, "Neural Network (Val)")
print(f"Neural Network Validation Results:")
print(f"  RMSE: ${nn_val_results['rmse']:.4f}")
print(f"  MAE: ${nn_val_results['mae']:.4f}")
print(f"  R²: {nn_val_results['r2']:.4f}")
print(f"  MAPE: {nn_val_results['mape']:.2f}%")

In [None]:
# Plot training history
if hasattr(nn_model, 'history') and nn_model.history:
    history = nn_model.history.history
    
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(history['loss'], label='Training Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Neural Network Training History')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 2, 2)
    plt.plot(history['mae'], label='Training MAE')
    plt.plot(history['val_mae'], label='Validation MAE')
    plt.xlabel('Epoch')
    plt.ylabel('MAE')
    plt.title('Neural Network MAE History')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 5. Ensemble Model Experiments

In [None]:
# Create ensemble with Random Forest models
print("Training Ensemble model...")

# Create base models with different configurations
base_models = [
    RandomForestPricingModel(n_estimators=50, max_depth=10, random_state=42),
    RandomForestPricingModel(n_estimators=100, max_depth=15, random_state=43),
    RandomForestPricingModel(n_estimators=150, max_depth=20, random_state=44)
]

# Train ensemble
ensemble_model = WeightedEnsemble(base_models, weight_regularization=0.1)
ensemble_model.train(X_train, y_train)

# Evaluate
ensemble_val_results = evaluate_model(ensemble_model, X_val, y_val, "Ensemble (Val)")
print(f"Ensemble Validation Results:")
print(f"  RMSE: ${ensemble_val_results['rmse']:.4f}")
print(f"  MAE: ${ensemble_val_results['mae']:.4f}")
print(f"  R²: {ensemble_val_results['r2']:.4f}")
print(f"  MAPE: {ensemble_val_results['mape']:.2f}%")
print(f"  Learned weights: {ensemble_model.weights}")

## 6. Model Comparison

In [None]:
# Compare all models on test set
print("Comparing models on test set...")

models_to_compare = [
    (rf_model, "Random Forest", X_test),
    (best_rf_model, "Tuned Random Forest", X_test),
    (nn_model, "Neural Network", X_test_scaled),
    (ensemble_model, "Ensemble", X_test)
]

comparison_results = []
for model, name, X_test_data in models_to_compare:
    results = evaluate_model(model, X_test_data, y_test, name)
    comparison_results.append(results)
    
    print(f"\n{name} Test Results:")
    print(f"  RMSE: ${results['rmse']:.4f}")
    print(f"  MAE: ${results['mae']:.4f}")
    print(f"  R²: {results['r2']:.4f}")
    print(f"  MAPE: {results['mape']:.2f}%")
    print(f"  Inference time: {results['inference_time']:.4f}s")

In [None]:
# Create comparison plots
model_names = [result['model'] for result in comparison_results]
rmse_scores = [result['rmse'] for result in comparison_results]
r2_scores = [result['r2'] for result in comparison_results]
inference_times = [result['inference_time'] for result in comparison_results]

fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Model Comparison Results', fontsize=16)

# RMSE comparison
bars1 = axes[0, 0].bar(model_names, rmse_scores, alpha=0.7)
axes[0, 0].set_title('RMSE Comparison')
axes[0, 0].set_ylabel('RMSE ($)')
axes[0, 0].tick_params(axis='x', rotation=45)
for bar in bars1:
    height = bar.get_height()
    axes[0, 0].text(bar.get_x() + bar.get_width()/2., height + 0.001,
                    f'${height:.4f}', ha='center', va='bottom')

# R² comparison
bars2 = axes[0, 1].bar(model_names, r2_scores, alpha=0.7, color='green')
axes[0, 1].set_title('R² Score Comparison')
axes[0, 1].set_ylabel('R² Score')
axes[0, 1].tick_params(axis='x', rotation=45)
for bar in bars2:
    height = bar.get_height()
    axes[0, 1].text(bar.get_x() + bar.get_width()/2., height + 0.001,
                    f'{height:.4f}', ha='center', va='bottom')

# Inference time comparison
bars3 = axes[1, 0].bar(model_names, inference_times, alpha=0.7, color='orange')
axes[1, 0].set_title('Inference Time Comparison')
axes[1, 0].set_ylabel('Time (seconds)')
axes[1, 0].tick_params(axis='x', rotation=45)
for bar in bars3:
    height = bar.get_height()
    axes[1, 0].text(bar.get_x() + bar.get_width()/2., height + 0.001,
                    f'{height:.4f}s', ha='center', va='bottom')

# Performance vs Speed scatter
axes[1, 1].scatter(inference_times, rmse_scores, s=100, alpha=0.7)
for i, name in enumerate(model_names):
    axes[1, 1].annotate(name, (inference_times[i], rmse_scores[i]), 
                        xytext=(5, 5), textcoords='offset points')
axes[1, 1].set_xlabel('Inference Time (seconds)')
axes[1, 1].set_ylabel('RMSE ($)')
axes[1, 1].set_title('Performance vs Speed Trade-off')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Plot predictions for best model
best_model_idx = np.argmin(rmse_scores)
best_model = models_to_compare[best_model_idx][0]
best_model_name = model_names[best_model_idx]
best_X_test = models_to_compare[best_model_idx][2]

print(f"\nPlotting predictions for best model: {best_model_name}")
plot_predictions(y_test, comparison_results[best_model_idx]['predictions'], best_model_name)

## 7. Cross-Validation Analysis

## 8. Model Interpretability

## 9. Model Experiments Summary