# 06 — Model Evaluation & Backtesting
Systematic evaluation using walk-forward validation across multiple horizons.

In [None]:
import sys, os
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings

from src.data_loader import fetch_multiple_stocks
from src.baselines import NaiveForecast, RandomWalkForecast, SMAForecast, ARIMAForecast
from src.evaluation import backtest_model, compare_models, walk_forward_split, compute_all_metrics, plot_model_comparison_heatmap, plot_metrics_bar_chart, statistical_test

warnings.filterwarnings('ignore')

## 1. Load Data

In [None]:
SELECTED_TICKERS = [
    'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA',
    'META', 'TESLA', 'BERKB', 'JPM', 'JNJ',
    'XOM', 'WMT', 'PG', 'MA', 'V',
    'HD', 'DIS', 'PYPL', 'NFLX', 'TSLA',
    'ADBE', 'CRM', 'INTC', 'AMD', 'CSCO',
    'IBM', 'BA', 'GE', 'CAT', 'MMM'
]

stock_data = fetch_multiple_stocks(SELECTED_TICKERS, period="2y")
sample_ticker = "AAPL"
series = stock_data[sample_ticker]["Close"]

print(f"Stock data shape: {series.shape}")

## 2. Walk-Forward Validation Splits

In [None]:
splits = walk_forward_split(series, n_windows=5)

print(f"Number of splits: {len(splits)}")
for i, (train_idx, test_idx) in enumerate(splits):
    train_dates = series.index[train_idx]
    test_dates = series.index[test_idx]
    print(f"Split {i}: Train [{train_dates[0]} to {train_dates[-1]}], Test [{test_dates[0]} to {test_dates[-1]}]")

## 3. Backtest All Models

In [None]:
model_configs = {
    "Naive": (NaiveForecast, {}),
    "Random Walk": (RandomWalkForecast, {}),
    "SMA(20)": (SMAForecast, {"window": 20}),
    "ARIMA": (ARIMAForecast, {})
}

backtest_results = {}

for model_name, (ModelClass, kwargs) in model_configs.items():
    model = ModelClass(**kwargs)
    result = backtest_model(model, series, walk_forward_split(series, n_windows=5))
    backtest_results[model_name] = result
    print(f"{model_name}: result shape {result.shape if hasattr(result, 'shape') else len(result)}")

## 4. Compare Models

In [None]:
summary = compare_models(backtest_results)
print(summary.to_string())

## 5. Visualize Model Comparison

In [None]:
try:
    plot_model_comparison_heatmap(summary, metric="RMSE_mean", save=True, filename="06_model_comparison_rmse.png")
    plot_model_comparison_heatmap(summary, metric="MAPE_mean", save=True, filename="06_model_comparison_mape.png")
except Exception as e:
    print(f"Error creating heatmaps: {e}")

## 6. Metrics Bar Charts

In [None]:
for horizon in ["1d", "1w", "1m"]:
    try:
        plot_metrics_bar_chart(summary, horizon=horizon, save=True, filename=f"06_metrics_{horizon}.png")
    except Exception as e:
        print(f"Skipping {horizon}: {e}")

## 7. Statistical Significance Tests

In [None]:
model_pairs = [('Naive', 'ARIMA'), ('Random Walk', 'ARIMA'), ('SMA(20)', 'ARIMA')]
stat_results = []

for model1, model2 in model_pairs:
    try:
        result = statistical_test(backtest_results[model1], backtest_results[model2])
        result['Model 1'] = model1
        result['Model 2'] = model2
        stat_results.append(result)
    except Exception as e:
        print(f"Error comparing {model1} vs {model2}: {e}")

if stat_results:
    stat_df = pd.DataFrame(stat_results)
    print(stat_df.to_string())

## 8. Multi-Stock Evaluation

In [None]:
multi_ticker_results = []
eval_tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA']

for ticker in eval_tickers:
    series = stock_data[ticker]["Close"]
    
    naive = NaiveForecast()
    naive_result = backtest_model(naive, series, walk_forward_split(series, n_windows=5))
    
    arima = ARIMAForecast()
    arima_result = backtest_model(arima, series, walk_forward_split(series, n_windows=5))
    
    multi_ticker_results.append({
        'ticker': ticker,
        'naive_rmse': naive_result.get('RMSE', np.nan) if isinstance(naive_result, dict) else np.nan,
        'arima_rmse': arima_result.get('RMSE', np.nan) if isinstance(arima_result, dict) else np.nan
    })

if multi_ticker_results:
    multi_ticker_df = pd.DataFrame(multi_ticker_results)
    print(multi_ticker_df.to_string())

## 9. Save Evaluation Results

In [None]:
summary.to_parquet("../data/processed/evaluation_summary.parquet")
if multi_ticker_results:
    multi_ticker_df.to_parquet("../data/processed/multi_ticker_evaluation.parquet")

print("Evaluation results saved.")