# Model Training and Evaluation

This notebook covers the machine learning model training process for the AI-enhanced 60/40 portfolio strategy.

## Objectives:
1. Engineer features from economic indicators
2. Train decision tree models for each asset
3. Evaluate model performance
4. Analyze feature importance
5. Validate with cross-validation

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
from sklearn.tree import plot_tree
import warnings
warnings.filterwarnings('ignore')

# Import custom modules
from data_acquisition import DataAcquisition
from feature_engineering import FeatureEngineer
from ml_model import PortfolioMLModel

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

pd.set_option('display.max_columns', None)
pd.set_option('display.precision', 4)

print("Libraries imported successfully!")

## 1. Load Data and Configuration

In [None]:
# Load configuration
with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Fetch data
data_acq = DataAcquisition(config)
prices, returns, indicators = data_acq.get_full_dataset()

print(f"Data loaded: {len(prices)} periods from {prices.index[0]} to {prices.index[-1]}")

## 2. Feature Engineering

In [None]:
# Initialize feature engineer
feature_eng = FeatureEngineer(config)

# Create all features
print("Engineering features...")
features_raw = feature_eng.engineer_all_features(indicators)
print(f"Raw features created: {features_raw.shape}")

# Prepare features for training
features = feature_eng.prepare_features_for_training(features_raw)
print(f"Prepared features: {features.shape}")
print(f"Feature columns: {len(features.columns)}")

In [None]:
# Display sample features
print("Sample of engineered features:")
display(features.head())

print("\nFeature columns:")
for i, col in enumerate(features.columns, 1):
    print(f"{i:3d}. {col}")

In [None]:
# Visualize feature correlations (sample)
sample_features = features[[col for col in features.columns if 'VIX' in col or 'Spread' in col or 'Rate' in col][:10]]

fig, ax = plt.subplots(figsize=(12, 10))
corr = sample_features.corr()
sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm', center=0, 
            square=True, linewidths=1, ax=ax, cbar_kws={'shrink': 0.8})
ax.set_title('Sample Feature Correlation Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 3. Prepare Training Data

In [None]:
# Initialize ML model
ml_model = PortfolioMLModel(config)

# Create target variables (next period returns)
targets = ml_model.create_target_variables(returns, lookback=1)

print(f"Targets created: {targets.shape}")
print(f"\nTarget assets: {list(targets.columns)}")

In [None]:
# Train/test split
X_train, X_test, y_train, y_test = ml_model.prepare_train_test_data(
    features, 
    targets,
    test_size=config['model']['validation']['test_size']
)

print(f"Training set: {X_train.shape}")
print(f"  From {X_train.index[0]} to {X_train.index[-1]}")
print(f"\nTest set: {X_test.shape}")
print(f"  From {X_test.index[0]} to {X_test.index[-1]}")

print(f"\nTrain/Test split: {len(X_train)}/{len(X_test)} ({len(X_train)/(len(X_train)+len(X_test)):.1%}/{len(X_test)/(len(X_train)+len(X_test)):.1%})")

## 4. Train Models

In [None]:
# Train models for all assets
models = ml_model.train_all_models(X_train, y_train)

print(f"\nTrained {len(models)} models")
print(f"Model type: {type(list(models.values())[0]).__name__}")
print(f"Model parameters: {ml_model.model_params}")


In [None]:
# Display training history
print("Training performance:")
training_df = pd.DataFrame(ml_model.training_history).T
display(training_df)

## 5. Evaluate Models

In [None]:
# Evaluate on test set
evaluation_results = ml_model.evaluate_all_models(X_test, y_test)

print("\nDetailed evaluation results:")
display(evaluation_results)

In [None]:
# Visualize evaluation metrics
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

metrics_to_plot = ['rmse', 'mae', 'r2', 'directional_accuracy']
titles = ['RMSE', 'MAE', 'R² Score', 'Directional Accuracy']

for idx, (metric, title) in enumerate(zip(metrics_to_plot, titles)):
    ax = axes[idx // 2, idx % 2]
    
    values = evaluation_results[metric]
    colors = plt.cm.viridis(np.linspace(0, 1, len(values)))
    
    bars = ax.bar(range(len(values)), values, color=colors, edgecolor='black')
    ax.set_xticks(range(len(values)))
    ax.set_xticklabels(values.index, rotation=45, ha='right')
    ax.set_ylabel(title, fontsize=11)
    ax.set_title(f'{title} by Asset', fontsize=12, fontweight='bold')
    ax.grid(True, alpha=0.3, axis='y')
    
    # Add value labels on bars
    for i, (bar, val) in enumerate(zip(bars, values)):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{val:.3f}', ha='center', va='bottom', fontsize=9)

plt.tight_layout()
plt.show()

## 6. Feature Importance Analysis

In [None]:
# Display feature importance for each asset
print("Top 15 features by importance:\n")

for asset, importance in ml_model.feature_importance.items():
    print(f"\n{asset}:")
    print(importance.head(15))

In [None]:
# Visualize feature importance for all assets
n_assets = len(ml_model.feature_importance)
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
axes = axes.ravel()

for idx, (asset, importance) in enumerate(ml_model.feature_importance.items()):
    if idx < len(axes):
        top_features = importance.head(15)
        
        axes[idx].barh(range(len(top_features)), top_features.values, color='steelblue', edgecolor='black')
        axes[idx].set_yticks(range(len(top_features)))
        axes[idx].set_yticklabels(top_features.index, fontsize=9)
        axes[idx].set_xlabel('Importance', fontsize=10)
        axes[idx].set_title(f'Top 15 Features - {asset}', fontsize=11, fontweight='bold')
        axes[idx].grid(True, alpha=0.3, axis='x')
        axes[idx].invert_yaxis()

plt.tight_layout()
plt.show()

In [None]:
# Aggregate feature importance across all assets
all_importance = pd.DataFrame(ml_model.feature_importance)
avg_importance = all_importance.mean(axis=1).sort_values(ascending=False)

print("Average feature importance across all assets:")
print(avg_importance.head(20))

# Plot
fig, ax = plt.subplots(figsize=(12, 8))
top_avg = avg_importance.head(20)
ax.barh(range(len(top_avg)), top_avg.values, color='coral', edgecolor='black')
ax.set_yticks(range(len(top_avg)))
ax.set_yticklabels(top_avg.index, fontsize=10)
ax.set_xlabel('Average Importance', fontsize=12)
ax.set_title('Top 20 Features - Average Across All Assets', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3, axis='x')
ax.invert_yaxis()
plt.tight_layout()
plt.show()

## 7. Prediction Analysis

In [None]:
# Make predictions on test set
predictions = ml_model.predict_returns(X_test)

print("Predictions shape:", predictions.shape)
print("\nSample predictions:")
display(predictions.head(10))

In [None]:
# Compare predictions vs actual returns
fig, axes = plt.subplots(2, 2, figsize=(16, 10))
axes = axes.ravel()

for idx, asset in enumerate(y_test.columns):
    if idx < len(axes):
        ax = axes[idx]
        
        # Scatter plot
        ax.scatter(y_test[asset], predictions[asset], alpha=0.6, s=30)
        
        # Add diagonal line (perfect prediction)
        min_val = min(y_test[asset].min(), predictions[asset].min())
        max_val = max(y_test[asset].max(), predictions[asset].max())
        ax.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='Perfect Prediction')
        
        # Calculate R²
        from sklearn.metrics import r2_score
        r2 = r2_score(y_test[asset], predictions[asset])
        
        ax.set_xlabel('Actual Returns', fontsize=11)
        ax.set_ylabel('Predicted Returns', fontsize=11)
        ax.set_title(f'{asset} - Predictions vs Actual (R²={r2:.3f})', 
                    fontsize=12, fontweight='bold')
        ax.legend(fontsize=9)
        ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Time series of predictions vs actuals
fig, axes = plt.subplots(len(y_test.columns), 1, figsize=(14, 12))

for idx, asset in enumerate(y_test.columns):
    axes[idx].plot(y_test.index, y_test[asset], label='Actual', linewidth=2, alpha=0.7)
    axes[idx].plot(predictions.index, predictions[asset], label='Predicted', 
                  linewidth=2, alpha=0.7, linestyle='--')
    axes[idx].axhline(y=0, color='black', linestyle='-', linewidth=0.5)
    axes[idx].set_ylabel('Returns', fontsize=10)
    axes[idx].set_title(f'{asset} - Predictions Over Time', fontsize=11, fontweight='bold')
    axes[idx].legend(fontsize=9)
    axes[idx].grid(True, alpha=0.3)

axes[-1].set_xlabel('Date', fontsize=11)
plt.tight_layout()
plt.show()

## 8. Cross-Validation

In [None]:
# Perform time series cross-validation
print("Performing cross-validation...")
cv_scores = ml_model.cross_validate(
    X_train, 
    y_train, 
    cv=config['model']['validation']['cv_folds']
)

print("\nCross-validation results:")
cv_df = pd.DataFrame(cv_scores).T
display(cv_df)

In [None]:
# Visualize CV scores
fig, ax = plt.subplots(figsize=(10, 6))

assets = list(cv_scores.keys())
means = [cv_scores[asset]['mean_mse'] for asset in assets]
stds = [cv_scores[asset]['std_mse'] for asset in assets]

x_pos = np.arange(len(assets))
ax.bar(x_pos, means, yerr=stds, capsize=5, color='skyblue', edgecolor='black', alpha=0.8)
ax.set_xticks(x_pos)
ax.set_xticklabels(assets, rotation=45, ha='right')
ax.set_ylabel('Mean Squared Error', fontsize=12)
ax.set_title('Cross-Validation Scores (with std dev)', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

## 9. Model Visualization (Sample)

In [None]:
# Visualize decision tree structure for one asset (limited depth for readability)
sample_asset = list(models.keys())[0]
sample_model = models[sample_asset]

fig, ax = plt.subplots(figsize=(20, 10))
plot_tree(sample_model, 
          feature_names=X_train.columns,
          filled=True,
          rounded=True,
          fontsize=8,
          max_depth=3,  # Limit depth for visualization
          ax=ax)
ax.set_title(f'Decision Tree Structure - {sample_asset} (Depth Limited to 3)', 
            fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

print(f"Note: Full tree has depth {sample_model.get_depth()}")
print(f"Number of leaves: {sample_model.get_n_leaves()}")

## 10. Key Findings

### Model Performance:
- Decision tree models show varying performance across assets
- Directional accuracy is particularly important for portfolio allocation
- Feature importance reveals which economic indicators drive predictions

### Important Features:
- VIX and its derivatives consistently rank high
- Yield spread changes provide valuable signals
- Interaction features capture complex market dynamics

### Next Steps:
1. Use these models to generate portfolio allocations
2. Backtest the complete strategy
3. Compare against traditional benchmarks

In [None]:
# Save models
ml_model.save_models(config['output']['models_dir'])
print(f"Models saved to {config['output']['models_dir']}/")
