# Time Series Models and Statistical Arbitrage Backtesting

This notebook demonstrates the implementation and backtesting of various time series models (ARIMA, GARCH, VAR) and statistical arbitrage strategies (Pairs Trading) using a custom backtesting engine. We typically split data into 7 years training, 1 year validation, and 2 years testing.

In [1]:
import sys
import os
from pathlib import Path
sys.path.append(os.path.abspath('../'))
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import coint
from arch import arch_model
from tqdm import tqdm

from src.backtester.engine import run_backtest, BacktestConfig, BacktestResult
from src.backtester.bokeh_plots import build_interactive_portfolio_layout
from bokeh.io import output_notebook, show

import warnings
warnings.filterwarnings('ignore')

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 7)
output_notebook()

def plot_equity(result, title="Strategy Performance"):
    """Helper to plot equity curve using Bokeh."""
    import pandas as pd
    from bokeh.io import show
    try:
        global_test_data = globals().get('test_data')
        if global_test_data is not None:
             proxy_close = global_test_data.mean(axis=1)
             close_prices = global_test_data
        else:
             proxy_close = result.equity
             close_prices = None
    except:
        proxy_close = result.equity
        close_prices = None
        
    market_proxy = pd.DataFrame({
        'Open': proxy_close,
        'High': proxy_close,
        'Low': proxy_close,
        'Close': proxy_close
    }, index=result.equity.index)
    
    layout = build_interactive_portfolio_layout(
        market_ohlcv=market_proxy,
        equity=result.equity,
        returns=result.returns,
        weights=result.weights,
        turnover=result.turnover,
        costs=result.costs,
        close_prices=close_prices,
        title=title
    )
    show(layout)


## 1. Data Loading and Splitting

In [2]:
# Load Data
data_path = Path("../dataset/cleaned")
# Load ALL Data
all_files = sorted(list(data_path.glob("Asset_*.csv")))

price_data = {}
print(f"Loading {len(all_files)} assets...")

# Load all assets into a dictionary
from tqdm import tqdm
for file in tqdm(all_files):
    asset_name = file.stem
    try:
        df = pd.read_csv(file, parse_dates=['Date'], index_col='Date')
        # Ensure unique index
        df = df[~df.index.duplicated(keep='first')]
        price_data[asset_name] = df['Close']
    except Exception as e:
        print(f"Error loading {file}: {e}")

# Combine into a single DataFrame
prices = pd.DataFrame(price_data).dropna()
print(f"Loaded prices shape: {prices.shape}")

# Split Data
# 7 Years Train, 1 Year Val, ~2 Years Test
train_end = '2023-01-25'
val_end = '2024-01-25'

train_data = prices.loc[:train_end]
val_data = prices.loc[train_end:val_end]
test_data = prices.loc[val_end:]

print(f"Train Range: {train_data.index.min()} to {train_data.index.max()}")
print(f"Val Range: {val_data.index.min()} to {val_data.index.max()}")
print(f"Test Range: {test_data.index.min()} to {test_data.index.max()}")


Loading 100 assets...


100%|██████████| 100/100 [00:01<00:00, 96.72it/s]

Loaded prices shape: (2511, 100)
Train Range: 2016-01-25 00:00:00 to 2023-01-25 00:00:00
Val Range: 2023-01-25 00:00:00 to 2024-01-25 00:00:00
Test Range: 2024-01-25 00:00:00 to 2026-01-16 00:00:00





## 2. ARIMA Strategy Backtest

Strategy: Fit an ARIMA model on a rolling window. Forecast the next return.
- If forecast return > 0, go Long.
- If forecast return < 0, go Short (or Neutral).

We will apply this to **ALL Assets** on the **Test Set**.

In [3]:
def arima_strategy(prices_series, order=(5,1,0), window=252, refit_step=20):
    # Note: Rolling ARIMA is computationally expensive. 
    # For demonstration, we will re-fit every 'window' days or simplified logic.
    # Here, to make it runnable, we might use a simplified approach or just Expanding Window walk-forward.
    
    signals = pd.Series(index=prices_series.index, data=0.0)
    
    # Using a walk-forward validation on the test set
    history = list(prices_series.iloc[:window])
    
    # Iterate through the rest
    # Optimization: Refit every N days to save time, or use simple AR model
    # refit_step set via parameter
    model_fit = None
    
    predictions = []
    
    print("Generating ARIMA signals... this may take a moment.")
    for t in tqdm(range(window, len(prices_series))):
        if t % refit_step == 0:
            # Refit
            model = ARIMA(history, order=order)
            model_fit = model.fit()
        else:
            # Append new observation (approximate update for AR models)
            # Statsmodels append is tricky, usually requires re-creation for full correctness
            # For speed in this demo, let's just use the old fit to forecast multiple steps ahead or just refit less often
            # Actually, let's just refit every step but limit history size to 'window' to speed up
            pass
            
        if t % refit_step == 0: # Only update signal on refit days for speed, hold otherwise
            output = model_fit.forecast()
            yhat = output[0]
            # Signal: 1 if yhat > current_price (expect rise), -1 if yhat < current
            curr_price = history[-1]
            if yhat > curr_price:
                signals.iloc[t] = 1.0
            else:
                signals.iloc[t] = -1.0
        else:
            signals.iloc[t] = signals.iloc[t-1]
            
        history.append(prices_series.iloc[t])
        
    return signals


# Run on ALL Test Data
# Construct Weights DataFrame initialized to 0
arima_weights = pd.DataFrame(index=test_data.index, columns=test_data.columns, data=0.0)

print(f"Running ARIMA Strategy on {len(test_data.columns)} assets...")

# Loop through each asset
for asset in tqdm(test_data.columns):
    try:
        # Run strategy for this asset
        # We increase refit_step to 60 (approx 3 months) to speed up execution on 100 assets
        # Reduce window to 60 for faster warmup
        sig = arima_strategy(test_data[asset], order=(1,0,0), window=60, refit_step=60) 
        arima_weights[asset] = sig
    except Exception as e:
        print(f"ARIMA failed for {asset}: {e}")

# Run Backtest
config = BacktestConfig(
    initial_equity=100_000,
    transaction_cost_bps=5,
    rebalance="D"
)

res_arima = run_backtest(test_data, arima_weights, config)
plot_equity(res_arima, f"ARIMA Strategy - Portfolio of {len(test_data.columns)} Assets")


Running ARIMA Strategy on 100 assets...


  0%|          | 0/100 [00:00<?, ?it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:01<00:00, 364.70it/s]
  1%|          | 1/100 [00:01<01:59,  1.20s/it]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 809.04it/s]
  2%|▏         | 2/100 [00:01<01:19,  1.23it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 502.06it/s]
  3%|▎         | 3/100 [00:02<01:21,  1.18it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 555.21it/s]
  4%|▍         | 4/100 [00:03<01:19,  1.21it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 627.89it/s]
  5%|▌         | 5/100 [00:04<01:14,  1.28it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 1034.41it/s]
  6%|▌         | 6/100 [00:04<01:01,  1.52it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 572.23it/s]
  7%|▋         | 7/100 [00:05<01:04,  1.44it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 564.17it/s]
  8%|▊         | 8/100 [00:06<01:06,  1.39it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 590.84it/s]
  9%|▉         | 9/100 [00:06<01:06,  1.37it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 694.74it/s]
 10%|█         | 10/100 [00:07<01:02,  1.43it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 907.43it/s]
 11%|█         | 11/100 [00:07<00:56,  1.58it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 717.28it/s]
 12%|█▏        | 12/100 [00:08<00:55,  1.59it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 591.40it/s]
 13%|█▎        | 13/100 [00:09<00:57,  1.51it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 640.98it/s]
 14%|█▍        | 14/100 [00:10<00:57,  1.49it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 636.43it/s]
 15%|█▌        | 15/100 [00:10<00:57,  1.48it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 878.29it/s]
 16%|█▌        | 16/100 [00:11<00:52,  1.60it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 616.67it/s]
 17%|█▋        | 17/100 [00:11<00:54,  1.54it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 636.07it/s]
 18%|█▊        | 18/100 [00:12<00:54,  1.51it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 899.08it/s]
 19%|█▉        | 19/100 [00:13<00:49,  1.64it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 728.60it/s]
 20%|██        | 20/100 [00:13<00:48,  1.64it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 1008.39it/s]
 21%|██        | 21/100 [00:14<00:44,  1.79it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 759.95it/s]
 22%|██▏       | 22/100 [00:14<00:44,  1.77it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 564.84it/s]
 23%|██▎       | 23/100 [00:15<00:48,  1.59it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 648.34it/s]
 24%|██▍       | 24/100 [00:16<00:49,  1.55it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 581.13it/s]
 25%|██▌       | 25/100 [00:16<00:51,  1.47it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 594.84it/s]
 26%|██▌       | 26/100 [00:17<00:51,  1.43it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 1089.60it/s]
 27%|██▋       | 27/100 [00:18<00:44,  1.63it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:01<00:00, 430.52it/s]
 28%|██▊       | 28/100 [00:19<00:52,  1.36it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 445.11it/s]
 29%|██▉       | 29/100 [00:20<00:57,  1.23it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 765.58it/s]
 30%|███       | 30/100 [00:20<00:51,  1.35it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 697.94it/s]
 31%|███       | 31/100 [00:21<00:48,  1.41it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 645.72it/s]
 32%|███▏      | 32/100 [00:21<00:47,  1.43it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:01<00:00, 401.19it/s]
 33%|███▎      | 33/100 [00:23<00:54,  1.22it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 497.25it/s]
 34%|███▍      | 34/100 [00:23<00:55,  1.19it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 584.41it/s]
 35%|███▌      | 35/100 [00:24<00:52,  1.23it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 864.00it/s]
 36%|███▌      | 36/100 [00:25<00:46,  1.38it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 1010.51it/s]
 37%|███▋      | 37/100 [00:25<00:40,  1.57it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 821.50it/s]
 38%|███▊      | 38/100 [00:26<00:37,  1.65it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 884.94it/s]
 39%|███▉      | 39/100 [00:26<00:35,  1.74it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 747.05it/s]
 40%|████      | 40/100 [00:27<00:34,  1.73it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 781.41it/s]
 41%|████      | 41/100 [00:27<00:33,  1.74it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 742.47it/s]
 42%|████▏     | 42/100 [00:28<00:33,  1.72it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 785.52it/s]
 43%|████▎     | 43/100 [00:29<00:32,  1.74it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 658.00it/s]
 44%|████▍     | 44/100 [00:29<00:33,  1.66it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 692.39it/s]
 45%|████▌     | 45/100 [00:30<00:33,  1.63it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 603.02it/s]
 46%|████▌     | 46/100 [00:31<00:35,  1.54it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 809.08it/s]
 47%|████▋     | 47/100 [00:31<00:32,  1.62it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 630.79it/s]
 48%|████▊     | 48/100 [00:32<00:33,  1.56it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 872.83it/s]
 49%|████▉     | 49/100 [00:32<00:30,  1.66it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 974.36it/s]
 50%|█████     | 50/100 [00:33<00:27,  1.80it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 1191.52it/s]
 51%|█████     | 51/100 [00:33<00:24,  2.00it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 859.87it/s]
 52%|█████▏    | 52/100 [00:34<00:24,  1.98it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 728.09it/s]
 53%|█████▎    | 53/100 [00:34<00:25,  1.87it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 875.85it/s]
 54%|█████▍    | 54/100 [00:35<00:24,  1.90it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 793.32it/s]
 55%|█████▌    | 55/100 [00:35<00:24,  1.87it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 847.62it/s]
 56%|█████▌    | 56/100 [00:36<00:23,  1.89it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 828.49it/s]
 57%|█████▋    | 57/100 [00:36<00:22,  1.89it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 830.67it/s]
 58%|█████▊    | 58/100 [00:37<00:22,  1.89it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 910.85it/s]
 59%|█████▉    | 59/100 [00:37<00:21,  1.94it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 1165.29it/s]
 60%|██████    | 60/100 [00:38<00:19,  2.10it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 1047.09it/s]
 61%|██████    | 61/100 [00:38<00:17,  2.18it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 722.59it/s]
 62%|██████▏   | 62/100 [00:39<00:19,  1.98it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 1144.08it/s]
 63%|██████▎   | 63/100 [00:39<00:17,  2.13it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 799.46it/s]
 64%|██████▍   | 64/100 [00:40<00:17,  2.03it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 1674.69it/s]
 65%|██████▌   | 65/100 [00:40<00:14,  2.35it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 796.60it/s]
 66%|██████▌   | 66/100 [00:41<00:15,  2.16it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 822.80it/s]
 67%|██████▋   | 67/100 [00:41<00:16,  2.06it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 656.67it/s]
 68%|██████▊   | 68/100 [00:42<00:17,  1.85it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 1234.97it/s]
 69%|██████▉   | 69/100 [00:42<00:15,  2.06it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 740.70it/s]
 70%|███████   | 70/100 [00:43<00:15,  1.93it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 664.10it/s]
 71%|███████   | 71/100 [00:43<00:16,  1.78it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 829.07it/s]
 72%|███████▏  | 72/100 [00:44<00:15,  1.81it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 751.08it/s]
 73%|███████▎  | 73/100 [00:44<00:15,  1.78it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 1163.92it/s]
 74%|███████▍  | 74/100 [00:45<00:13,  1.97it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 764.39it/s]
 75%|███████▌  | 75/100 [00:45<00:13,  1.89it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 610.60it/s]
 76%|███████▌  | 76/100 [00:46<00:14,  1.71it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 552.29it/s]
 77%|███████▋  | 77/100 [00:47<00:14,  1.54it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 578.96it/s]
 78%|███████▊  | 78/100 [00:48<00:15,  1.47it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 481.66it/s]
 79%|███████▉  | 79/100 [00:49<00:15,  1.33it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 960.01it/s]
 80%|████████  | 80/100 [00:49<00:13,  1.51it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 668.87it/s]
 81%|████████  | 81/100 [00:50<00:12,  1.51it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 971.69it/s]
 82%|████████▏ | 82/100 [00:50<00:10,  1.67it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 630.79it/s]
 83%|████████▎ | 83/100 [00:51<00:10,  1.59it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 721.26it/s]
 84%|████████▍ | 84/100 [00:52<00:09,  1.60it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 624.48it/s]
 85%|████████▌ | 85/100 [00:52<00:09,  1.54it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 948.16it/s]
 86%|████████▌ | 86/100 [00:53<00:08,  1.69it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 1037.17it/s]
 87%|████████▋ | 87/100 [00:53<00:07,  1.84it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 663.02it/s]
 88%|████████▊ | 88/100 [00:54<00:06,  1.73it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 653.95it/s]
 89%|████████▉ | 89/100 [00:54<00:06,  1.65it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 572.78it/s]
 90%|█████████ | 90/100 [00:55<00:06,  1.53it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 721.51it/s]
 91%|█████████ | 91/100 [00:56<00:05,  1.56it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 1086.56it/s]
 92%|█████████▏| 92/100 [00:56<00:04,  1.75it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 806.84it/s]
 93%|█████████▎| 93/100 [00:57<00:03,  1.78it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 705.29it/s]
 94%|█████████▍| 94/100 [00:57<00:03,  1.72it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 787.87it/s]
 95%|█████████▌| 95/100 [00:58<00:02,  1.74it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 895.42it/s]
 96%|█████████▌| 96/100 [00:58<00:02,  1.82it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 771.91it/s]
 97%|█████████▋| 97/100 [00:59<00:01,  1.80it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 650.41it/s]
 98%|█████████▊| 98/100 [01:00<00:01,  1.69it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 953.47it/s]
 99%|█████████▉| 99/100 [01:00<00:00,  1.81it/s]

Generating ARIMA signals... this may take a moment.


100%|██████████| 437/437 [00:00<00:00, 607.73it/s]
100%|██████████| 100/100 [01:01<00:00,  1.63it/s]


## 3. GARCH Volatility Strategy

Strategy: Use GARCH to forecast volatility.
- If Volatility is High -> Reduce Exposure (or Short).
- If Volatility is Low -> Increase Exposure (Long).
- Alternatively, Volatility Targeting: Size position = Target Vol / Forecast Vol.

Here we implement a simple **Volatility Targeting** approach on ALL assets.

In [4]:
def garch_vol_target(prices_series, target_vol=0.01, window=252):
    returns = prices_series.pct_change().dropna() * 100 # GARCH handles percent better usually
    
    # Rolling forecast
    # Use a fixed rolling window for model fitting
    
    weights = pd.Series(index=prices_series.index, data=0.0)
    
    # We need a burn-in period
    print("Generating GARCH signals...")
    
    # Optimize: Refit weekly (every 5 days) to save time
    refit_freq = 5
    
    for t in tqdm(range(window, len(returns))):
        if t % refit_freq == 0:
            curr_rets = returns.iloc[t-window:t]
            try:
                # GARCH(1,1)
                am = arch_model(curr_rets, vol='Garch', p=1, q=1)
                res = am.fit(disp='off')
                forecast = res.forecast(horizon=1)
                vol_forecast = np.sqrt(forecast.variance.values[-1, 0]) / 100.0 # Convert back to decimal
                
                if vol_forecast > 1e-4:
                    w = target_vol / vol_forecast
                    # Cap leverage at 1.5
                    w = min(w, 1.5)
                    weights.iloc[t + 1] = w # Apply to next day (returns index shifted by 1 vs prices)
                
            except:
                weights.iloc[t+1] = 0.0
        else:
            # Carry forward previous weight
            if t+1 < len(weights):
                weights.iloc[t+1] = weights.iloc[t]
                
    # Align indices (returns index starts day 1, weights should shift to match prices)
    return weights

# Run on Test Data
garch_w_series = garch_vol_target(test_data['Asset_001'], window=100)

garch_weights = pd.DataFrame(index=test_data.index, columns=test_data.columns, data=0.0)
garch_weights['Asset_001'] = garch_w_series

res_garch = run_backtest(test_data, garch_weights, config)
plot_equity(res_garch, "GARCH Volatility Targeting - Asset 001")

Generating GARCH signals...


100%|██████████| 396/396 [00:01<00:00, 332.70it/s]


## 4. Vector Autoregression (VAR) Strategy

Strategy: Use lag values of both Asset 001 and Asset 002 to predict next move of Asset 001.
- If VAR predicts Asset 001 Up, Buy.
- If VAR predicts Asset 001 Down, Sell.

In [5]:
def var_strategy(price_df, lags=2, window=252):
    rets = price_df.pct_change().dropna()
    signals = pd.DataFrame(index=price_df.index, columns=price_df.columns, data=0.0)
    
    print("Generating VAR signals...")
    refit_freq = 20
    
    # Rolling window
    for t in tqdm(range(window, len(rets))):
        if t % refit_freq == 0:
            train_slice = rets.iloc[t-window:t]
            try:
                model = VAR(train_slice)
                results = model.fit(lags)
                
                lag_order = results.k_ar
                # Forecast next step
                forecast = results.forecast(train_slice.values[-lag_order:], 1)
                
                # If forecasted return > 0 -> 1, else -1
                sig = np.where(forecast[0] > 0, 1.0, -1.0)
                
                # Signal effective for t+1 (price index equivalent might be t + something)
                # rets index t is return from t-1 to t. Forecast is for t+1.
                # We apply this weight to be active at close of t (start of t+1 risk)
                
                # Map back to price index
                # rets.index[t] corresponds to price date t. 
                # We want to set signal for t+1 based on info up to t
                current_date = rets.index[t]
                
                signals.loc[current_date:] = sig # Forward fill until next update
            except:
               pass
    
    return signals

var_sigs = var_strategy(test_data[['Asset_001', 'Asset_002']], window=100)

# Construct Weights
var_weights = var_sigs.copy()
var_weights['Asset_002'] = 0.0 # Let's just trade Asset 1 based on VAR info including Asset 2

res_var = run_backtest(test_data[var_weights.columns], var_weights, config)
plot_equity(res_var, "VAR Strategy on Asset 001 (using Asset 002 info)")

Generating VAR signals...


100%|██████████| 396/396 [00:00<00:00, 8564.48it/s]


## 5. Statistical Arbitrage (Pairs Trading)

Strategy: 
1. Calculate Spread = Asset 1 - HedgeRatio * Asset 2.
   - HedgeRatio calculated usually on Training Data via Cointegration (OLS).
2. Calculate Z-Score of Spread.
3. Entry:
   - Z > 2: Short Spread (Short A1, Long A2)
   - Z < -2: Long Spread (Long A1, Short A2)
4. Exit:
   - Z crosses 0.
   
We use the **Train Data** to determine if they are cointegrated and find the hedge ratio.

In [6]:
import statsmodels.api as sm

# 1. Find Hedge Ratio on Train Data
pair_to_trade = ('Asset_001', 'Asset_002')
S1_train = train_data[pair_to_trade[0]]
S2_train = train_data[pair_to_trade[1]]

S1_train = sm.add_constant(S1_train)
results = sm.OLS(S2_train, S1_train).fit()
S1_train = S1_train['Asset_001']

b = results.params['Asset_001']
print(f"Hedge Ratio (Beta): {b:.4f}")

# Check Cointegration on Train
spread_train = S2_train - b * S1_train
score, pvalue, _ = coint(S1_train, S2_train)
print(f"Cointegration p-value on Train: {pvalue:.4f}")

if pvalue > 0.1:
    print("Warning: Pairs might not be cointegrated adequately.")

# 2. Strategy on Test Data
S1_test = test_data[pair_to_trade[0]]
S2_test = test_data[pair_to_trade[1]]

spread = S2_test - b * S1_test
spread_mean = spread_train.mean() # Use Train statistics to avoid lookahead bias
spread_std = spread_train.std()

zscore = (spread - spread_mean) / spread_std

# Generate Signals
# Long Spread: Buy S2, Sell S1 (since Spread = S2 - b*S1)
# Short Spread: Sell S2, Buy S1

entry_threshold = 2.0
exit_threshold = 0.0

# Vectorized signal generation for pair
long_spread = zscore < -entry_threshold
short_spread = zscore > entry_threshold
exit_pos = abs(zscore) < 0.5 # Exit zone near mean

pos_s2 = pd.Series(data=0.0, index=zscore.index)
pos_s1 = pd.Series(data=0.0, index=zscore.index)

# State tracking loop for correct entry/exit logic
current_pos = 0 # 0, 1 (Long Spread), -1 (Short Spread)

for t in range(len(zscore)):
    z = zscore.iloc[t]
    if current_pos == 0:
        if z < -entry_threshold:
            current_pos = 1 # Long Spread (Buy S2, Sell S1)
        elif z > entry_threshold:
            current_pos = -1 # Short Spread (Sell S2, Buy S1)
    elif current_pos == 1:
        if z >= 0:
            current_pos = 0 # Exit
    elif current_pos == -1:
        if z <= 0:
            current_pos = 0 # Exit
            
    # Assign Weights
    # Spread = S2 - b*S1
    # Long Spread -> +1 unit S2, -b units S1
    if current_pos == 1:
        pos_s2.iloc[t] = 1.0
        pos_s1.iloc[t] = -b
    elif current_pos == -1:
        pos_s2.iloc[t] = -1.0
        pos_s1.iloc[t] = b
        
# Construct Weights DF
pair_weights = pd.DataFrame(index=test_data.index)
pair_weights[pair_to_trade[0]] = pos_s1
pair_weights[pair_to_trade[1]] = pos_s2

res_pair = run_backtest(test_data[pair_weights.columns], pair_weights, config)
plot_equity(res_pair, "Statistical Arbitrage (Pairs Trading)")

Hedge Ratio (Beta): 1.8480
Cointegration p-value on Train: 0.3253


## 6. Comparison of Results

We compare the equity curves of all strategies on the test set.

In [7]:

# Compare All
from bokeh.plotting import figure
from bokeh.palettes import Category10_4

plt.figure(figsize=(12, 6)) # Keep mpl for static/thumbnail if needed, but show bokeh below

p_comp = figure(
    x_axis_type="datetime", 
    height=400, 
    width=1100, 
    title="Strategy Comparison - Equity Curves",
    tools="pan,wheel_zoom,box_zoom,reset,save"
)

colors = itertools.cycle(Category10_4)

if 'res_arima' in locals(): 
    p_comp.line(res_arima.equity.index, res_arima.equity, legend_label='ARIMA (Portfolio)', color=next(colors), line_width=2)
if 'res_garch' in locals(): 
    p_comp.line(res_garch.equity.index, res_garch.equity, legend_label='GARCH (Portfolio)', color=next(colors), line_width=2)
if 'res_var' in locals(): 
    p_comp.line(res_var.equity.index, res_var.equity, legend_label='VAR (Subset)', color=next(colors), line_width=2)
if 'res_pair' in locals(): 
    p_comp.line(res_pair.equity.index, res_pair.equity, legend_label=f'Pairs ({pair_to_trade[0]}-{pair_to_trade[1]})', color=next(colors), line_width=2)

# Benchmark
benchmark = test_data.mean(axis=1).pct_change().fillna(0) + 1
benchmark_equity = config.initial_equity * benchmark.cumprod()
p_comp.line(benchmark_equity.index, benchmark_equity, legend_label='Benchmark (Equal Weight)', color='black', line_dash='dashed', line_width=2)

p_comp.legend.click_policy = "hide"
show(p_comp)


<Figure size 1200x600 with 0 Axes>