# Model Validation Framework Demo

This notebook demonstrates the walk-forward validation and trading metrics framework.

## Contents
1. Setup and Data Loading
2. Walk-Forward Validation
3. Performance Metrics Calculation
4. Visualization and Analysis
5. Overfitting Detection

In [None]:
# Imports
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Add project paths
sys.path.insert(0, '../../worktrees/backtesting/src')
sys.path.insert(0, '../../worktrees/neural-network/src')

from validation.walk_forward import WalkForwardValidator, WalkForwardConfig
from metrics.trading_metrics import TradingMetricsCalculator, PerformanceMetrics

# Configure matplotlib
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
%matplotlib inline

print('✅ Imports successful')

## 1. Setup and Data Loading

Load historical price data and prepare features for model training.

In [None]:
# Generate sample data (replace with real data loading)
np.random.seed(42)

# Create 1 year of daily data
dates = pd.date_range('2023-01-01', '2024-01-01', freq='D')
n_samples = len(dates)

# Simulate price data with trend and noise
trend = np.linspace(50000, 60000, n_samples)
noise = np.random.randn(n_samples) * 2000
prices = trend + noise

# Calculate returns
returns = np.diff(prices) / prices[:-1]
returns = np.insert(returns, 0, 0)

# Create features
data = pd.DataFrame({
    'price': prices,
    'returns': returns,
    'feature_1': np.random.randn(n_samples),  # Momentum indicator
    'feature_2': np.random.randn(n_samples),  # Volatility indicator
    'feature_3': np.random.randn(n_samples),  # Volume indicator
    'target': (returns > 0).astype(int)  # Binary target: up/down
}, index=dates)

print(f'Data shape: {data.shape}')
print(f'Date range: {data.index[0]} to {data.index[-1]}')
data.head()

In [None]:
# Plot price and returns
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8))

ax1.plot(data.index, data['price'], linewidth=2)
ax1.set_title('BTC-USD Price (Simulated)', fontsize=14, fontweight='bold')
ax1.set_ylabel('Price ($)')
ax1.grid(True, alpha=0.3)

ax2.plot(data.index, data['returns'], linewidth=1, alpha=0.7)
ax2.axhline(y=0, color='red', linestyle='--', alpha=0.5)
ax2.set_title('Daily Returns', fontsize=14, fontweight='bold')
ax2.set_ylabel('Returns')
ax2.set_xlabel('Date')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 2. Walk-Forward Validation

Perform rolling window validation to test model robustness.

In [None]:
# Configure walk-forward validation
config = WalkForwardConfig(
    train_window_days=90,  # 3 months training
    test_window_days=30,   # 1 month testing
    step_days=14,          # Step forward 2 weeks
    min_train_samples=50,
    refit_frequency=1      # Retrain every window
)

validator = WalkForwardValidator(config)

print('✅ Validator configured')
print(f'   Train window: {config.train_window_days} days')
print(f'   Test window: {config.test_window_days} days')
print(f'   Step size: {config.step_days} days')

In [None]:
# Define model and training functions
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def model_factory():
    """Create new model instance"""
    return RandomForestClassifier(
        n_estimators=100,
        max_depth=10,
        random_state=42,
        n_jobs=-1
    )

def train_fn(model, train_data):
    """Train model on data"""
    X = train_data[['feature_1', 'feature_2', 'feature_3']]
    y = train_data['target']
    model.fit(X, y)
    return model

def predict_fn(model, test_data):
    """Make predictions"""
    X = test_data[['feature_1', 'feature_2', 'feature_3']]
    return model.predict(X)

def metric_fn(predictions, actuals):
    """Calculate metrics"""
    return {
        'accuracy': accuracy_score(actuals, predictions),
        'precision': precision_score(actuals, predictions, zero_division=0),
        'recall': recall_score(actuals, predictions, zero_division=0),
        'f1_score': f1_score(actuals, predictions, zero_division=0)
    }

print('✅ Model functions defined')

In [None]:
# Run walk-forward validation
print('Running walk-forward validation...')
print('This may take 1-2 minutes...\n')

results = validator.validate(
    data=data,
    model_factory=model_factory,
    train_fn=train_fn,
    predict_fn=predict_fn,
    metric_fn=metric_fn,
    expanding_window=False,  # Use fixed window
    gap_days=1  # 1-day gap to prevent leakage
)

print(f'\n✅ Validation complete: {len(results)} windows tested')

In [None]:
# Display aggregated metrics
aggregated = validator.aggregate_metrics()

print('=== Aggregated Test Metrics ===')
for metric, value in aggregated.items():
    print(f'{metric:25s}: {value:.4f}')

In [None]:
# Check for overfitting
stability = validator.get_stability_score('accuracy')
overfitting = validator.detect_overfitting(threshold=0.15)

print(f'\n📊 Model Stability Score: {stability:.3f} (higher is better)')
print(f'🔍 Overfitting Detected: {overfitting}')

if overfitting:
    print('⚠️  WARNING: Model may be overfitting. Consider:')
    print('   - Reducing model complexity')
    print('   - Adding regularization')
    print('   - Increasing training data')
else:
    print('✅ Model shows good generalization')

## 3. Performance Metrics Calculation

Calculate comprehensive trading metrics.

In [None]:
# Generate trading signals from predictions
# Aggregate all test predictions
all_predictions = []
all_dates = []

for result in results:
    for i, pred in enumerate(result.predictions):
        date = result.test_start + timedelta(days=i)
        all_dates.append(date)
        all_predictions.append(pred)

predictions_df = pd.DataFrame({
    'date': all_dates,
    'prediction': all_predictions
}).set_index('date')

# Merge with actual returns
strategy_data = data[['returns']].join(predictions_df, how='inner')
strategy_data['position'] = strategy_data['prediction'].shift(1)  # Trade on next day
strategy_data['strategy_returns'] = strategy_data['position'] * strategy_data['returns']

# Drop NaN
strategy_data = strategy_data.dropna()

print(f'Strategy data: {len(strategy_data)} days')
strategy_data.head()

In [None]:
# Calculate performance metrics
calc = TradingMetricsCalculator(risk_free_rate=0.02)

# Buy & Hold metrics
bh_metrics = calc.calculate_all_metrics(strategy_data['returns'])

# Strategy metrics
strategy_metrics = calc.calculate_all_metrics(strategy_data['strategy_returns'])

# Compare
comparison = pd.DataFrame({
    'Buy & Hold': [
        bh_metrics.total_return,
        bh_metrics.annual_return,
        bh_metrics.sharpe_ratio,
        bh_metrics.sortino_ratio,
        bh_metrics.max_drawdown
    ],
    'Strategy': [
        strategy_metrics.total_return,
        strategy_metrics.annual_return,
        strategy_metrics.sharpe_ratio,
        strategy_metrics.sortino_ratio,
        strategy_metrics.max_drawdown
    ]
}, index=['Total Return', 'Annual Return', 'Sharpe Ratio', 'Sortino Ratio', 'Max Drawdown'])

print('=== Performance Comparison ===')
print(comparison)

## 4. Visualization and Analysis

In [None]:
# Plot equity curves
bh_equity = (1 + strategy_data['returns']).cumprod()
strategy_equity = (1 + strategy_data['strategy_returns']).cumprod()

fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(bh_equity.index, bh_equity, label='Buy & Hold', linewidth=2, alpha=0.8)
ax.plot(strategy_equity.index, strategy_equity, label='Strategy', linewidth=2, alpha=0.8)

ax.set_title('Equity Curve Comparison', fontsize=16, fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('Equity (Initial = 1.0)')
ax.legend(fontsize=12)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f'Final Buy & Hold Equity: {bh_equity.iloc[-1]:.3f}')
print(f'Final Strategy Equity: {strategy_equity.iloc[-1]:.3f}')

In [None]:
# Plot walk-forward results
validator.plot_results(metric_name='accuracy')

In [None]:
# Export results
output_dir = '../../results/validation'
os.makedirs(output_dir, exist_ok=True)

validator.export_results(f'{output_dir}/walk_forward_results.csv')
print(f'✅ Results exported to {output_dir}/walk_forward_results.csv')

## 5. Conclusion

This notebook demonstrated:
- Walk-forward validation for robustness testing
- Comprehensive trading metrics calculation
- Overfitting detection
- Performance visualization

Use this framework to validate all trading models before deployment!