# Walk-Forward Validation

## Overview
Test insurance strategies with rolling walk-forward windows to detect overfitting, measure consistency, and rank strategies by **out-of-sample** performance.

- **Prerequisites**: [optimization/01_optimization_overview](../optimization/01_optimization_overview.ipynb)
- **Estimated runtime**: 2-4 minutes
- **Audience**: [Developer]

In [None]:
"""Google Colab setup: mount Drive and install package dependencies.

Run this cell first. If prompted to restart the runtime, do so, then re-run all cells.
This cell is a no-op when running locally.
"""
import sys, os
if 'google.colab' in sys.modules:
    from google.colab import drive
    drive.mount('/content/drive')

    NOTEBOOK_DIR = '/content/drive/My Drive/Colab Notebooks/ei_notebooks/advanced'

    os.chdir(NOTEBOOK_DIR)
    if NOTEBOOK_DIR not in sys.path:
        sys.path.append(NOTEBOOK_DIR)

    !pip install ergodic-insurance -q 2>&1 | tail -3
    print('\nSetup complete. If you see numpy/scipy import errors below,')
    print('restart the runtime (Runtime > Restart runtime) and re-run all cells.')

## Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

from ergodic_insurance.config import ManufacturerConfig
from ergodic_insurance.manufacturer import WidgetManufacturer
from ergodic_insurance.strategy_backtester import (
    NoInsuranceStrategy, ConservativeFixedStrategy,
    AggressiveFixedStrategy, OptimizedStaticStrategy,
    AdaptiveStrategy,
)
from ergodic_insurance.validation_metrics import MetricCalculator, PerformanceTargets
from ergodic_insurance.walk_forward_validator import WalkForwardValidator

sns.set_style("whitegrid")
plt.rcParams["figure.figsize"] = (12, 6)

# Reproducibility
SEED = 42
np.random.seed(SEED)

## 1. Define Strategies and Targets

In [None]:
strategies = [
    NoInsuranceStrategy(),
    ConservativeFixedStrategy(
        primary_limit=5_000_000, excess_limit=20_000_000,
        higher_limit=25_000_000, deductible=50_000,
    ),
    AggressiveFixedStrategy(
        primary_limit=2_000_000, excess_limit=5_000_000, deductible=250_000,
    ),
    OptimizedStaticStrategy(target_roe=0.15, max_ruin_prob=0.01),
    AdaptiveStrategy(
        base_deductible=100_000, base_primary=3_000_000,
        base_excess=10_000_000, adaptation_window=3, adjustment_factor=0.2,
    ),
]

targets = PerformanceTargets(
    min_roe=0.10, max_ruin_probability=0.05,
    min_sharpe_ratio=0.8, max_drawdown=0.30, min_growth_rate=0.05,
)

for s in strategies:
    print(f"  - {s.name}: {s.get_description()}")

## 2. Configure and Run Walk-Forward Validation

In [None]:
validator = WalkForwardValidator(
    window_size=3, step_size=1, test_ratio=0.3,
    performance_targets=targets,
)

total_years = 10
windows = validator.generate_windows(total_years)
print(f"Windows: {len(windows)}")
for w in windows[:3]:
    print(f"  {w}")

In [None]:
mfg_config = ManufacturerConfig(
    initial_assets=10_000_000, asset_turnover_ratio=1.0,
    base_operating_margin=0.08, tax_rate=0.25, retention_ratio=0.95,
)
manufacturer = WidgetManufacturer(mfg_config)

print("Running walk-forward validation...")
result = validator.validate_strategies(
    strategies=strategies, n_years=total_years,
    n_simulations=100, manufacturer=manufacturer,
)
print(f"Best strategy: {result.best_strategy}")

## 3. Strategy Rankings

In [None]:
if not result.strategy_rankings.empty:
    cols = ["strategy", "avg_roe", "avg_ruin_prob", "avg_sharpe",
            "overfitting_score", "consistency_score", "composite_score"]
    df = result.strategy_rankings[cols].copy()
    df["avg_roe"] = df["avg_roe"].apply(lambda x: f"{x:.2%}")
    df["avg_ruin_prob"] = df["avg_ruin_prob"].apply(lambda x: f"{x:.2%}")
    print(df.to_string(index=False))
else:
    print("No rankings available.")

## 4. Overfitting and Consistency

In [None]:
print("Overfitting scores:")
for name, score in result.overfitting_analysis.items():
    tag = "Low" if score < 0.2 else ("Moderate" if score < 0.4 else "High")
    print(f"  {name:25s} {score:.3f}  ({tag})")

print("\nConsistency scores:")
for name, score in result.consistency_scores.items():
    tag = "High" if score > 0.8 else ("Moderate" if score > 0.6 else "Low")
    print(f"  {name:25s} {score:.3f}  ({tag})")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Overfitting
names = list(result.overfitting_analysis.keys())
scores = list(result.overfitting_analysis.values())
colours = ["green" if s < 0.2 else ("orange" if s < 0.4 else "red") for s in scores]
axes[0].bar(names, scores, color=colours)
axes[0].axhline(0.2, ls="--", color="orange", alpha=0.5)
axes[0].axhline(0.4, ls="--", color="red", alpha=0.5)
axes[0].set_title("Overfitting Scores")
axes[0].set_ylabel("Score")
plt.setp(axes[0].get_xticklabels(), rotation=45, ha="right")

# Consistency
c_names = list(result.consistency_scores.keys())
c_scores = list(result.consistency_scores.values())
c_colours = ["green" if s > 0.8 else ("orange" if s > 0.6 else "red") for s in c_scores]
axes[1].barh(c_names, c_scores, color=c_colours)
axes[1].axvline(0.6, ls="--", color="orange", alpha=0.5)
axes[1].axvline(0.8, ls="--", color="green", alpha=0.5)
axes[1].set_title("Consistency Scores")
axes[1].set_xlabel("Score")
axes[1].set_xlim(0, 1)

plt.tight_layout()
plt.show()

## 5. Performance Across Windows

In [None]:
strategy_names = list(result.window_results[0].strategy_performances.keys())
metric_keys = ["Out-Sample ROE", "Ruin Probability", "Sharpe Ratio", "Growth Rate"]
metric_data = {k: {s: [] for s in strategy_names} for k in metric_keys}

for wr in result.window_results:
    for s in strategy_names:
        perf = wr.strategy_performances.get(s)
        if perf and perf.out_sample_metrics:
            m = perf.out_sample_metrics
            metric_data["Out-Sample ROE"][s].append(m.roe)
            metric_data["Ruin Probability"][s].append(m.ruin_probability)
            metric_data["Sharpe Ratio"][s].append(m.sharpe_ratio)
            metric_data["Growth Rate"][s].append(m.growth_rate)

fig, axes = plt.subplots(2, 2, figsize=(14, 9))
for ax, (mname, mvals) in zip(axes.flat, metric_data.items()):
    for s in strategy_names:
        if mvals[s]:
            ax.plot(range(len(mvals[s])), mvals[s], "o-", lw=2, label=s)
    ax.set_title(mname, fontweight="bold")
    ax.set_xlabel("Window")
    ax.legend(fontsize=8)
    ax.grid(True, alpha=0.3)

plt.suptitle("Strategy Performance Across Windows", fontweight="bold")
plt.tight_layout()
plt.show()

## Key Takeaways

- Walk-forward validation prevents **overfitting** by testing strategies on unseen data.
- **Consistency scores** highlight strategies that perform reliably across market regimes.
- Composite ranking balances ROE, risk, Sharpe, overfitting, and consistency into a single score.
- The adaptive strategy often balances performance with robustness.

## Next Steps

- [advanced/01_hjb_optimal_control](01_hjb_optimal_control.ipynb) -- HJB-based optimal insurance control
- [advanced/03_advanced_convergence](03_advanced_convergence.ipynb) -- Monte Carlo convergence diagnostics
- [optimization/03_pareto_analysis](../optimization/03_pareto_analysis.ipynb) -- multi-objective trade-offs