# Notebook C: RL (SAC) + Regime Detection + Backtest
**Run on Colab Pro+ H100** | Part 3 of 3 parallel sessions
- Trains SAC agent for portfolio allocation
- Runs regime detection (KMeans + HMM)
- Runs full backtest with HTML report

In [1]:
# === ENVIRONMENT SETUP ===
import subprocess, sys, os

# Reset working directory (prevents getcwd bug on re-run)
os.chdir("/content")

# Clean and re-clone for fresh code
if os.path.exists('/content/quant-lab'):
    print("Removing existing quant-lab directory...")
    subprocess.run(['rm', '-rf', '/content/quant-lab'])

print("Cloning repository...")
result = subprocess.run(
    ['git', 'clone', 'https://github.com/Mohit1053/quant-lab.git', '/content/quant-lab'],
    capture_output=True, text=True
)
if result.returncode != 0:
    print(f"Clone failed: {result.stderr}")
    raise RuntimeError("Git clone failed")
print("Clone successful.")

os.chdir('/content/quant-lab')
subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', '-e', '.'], check=True)
print("Package installed.")

from google.colab import drive
drive.mount('/content/drive', force_remount=False)

from pathlib import Path
DRIVE_DIR = Path('/content/drive/MyDrive/quant_lab')
for d in ['data/raw', 'data/cleaned', 'data/features', 'outputs/models/rl/sac',
          'outputs/models/transformer', 'outputs/regimes', 'outputs/backtests']:
    (DRIVE_DIR / d).mkdir(parents=True, exist_ok=True)

import torch
if torch.cuda.is_available():
    gpu = torch.cuda.get_device_name(0)
    mem = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"GPU: {gpu} ({mem:.1f} GB) | BF16: {torch.cuda.is_bf16_supported()}")
else:
    print("WARNING: No GPU!")

Removing existing quant-lab directory...
Cloning repository...
Clone successful.
Package installed.
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
GPU: NVIDIA RTX PRO 6000 Blackwell Server Edition (102.0 GB) | BF16: True


In [4]:
# === FIX NUMPY/SCIPY VERSIONS (Colab ships newer incompatible versions) ===
!pip uninstall -y numpy pandas scipy scikit-learn
!pip install --no-cache-dir numpy==1.26.4 pandas==2.2.2 scipy==1.11.4 scikit-learn==1.4.2

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Found existing installation: pandas 2.2.2
Uninstalling pandas-2.2.2:
  Successfully uninstalled pandas-2.2.2
Found existing installation: scipy 1.16.3
Uninstalling scipy-1.16.3:
  Successfully uninstalled scipy-1.16.3
Found existing installation: scikit-learn 1.6.1
Uninstalling scikit-learn-1.6.1:
  Successfully uninstalled scikit-learn-1.6.1
Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pandas==2.2.2
  Downloading pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting scipy==1.11.4
  Downloading scipy-1.11.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# === LOAD DATA FROM DRIVE (cached by Notebook A) ===
import shutil, time

drive_features = DRIVE_DIR / 'data/features/nifty50_features.parquet'
local_features = Path('data/features/nifty50_features.parquet')

if not drive_features.exists():
    print("Waiting for Notebook A to cache data to Drive...")
    for i in range(60):
        if drive_features.exists():
            break
        time.sleep(10)
        if i % 6 == 0:
            print(f"  Still waiting... ({i*10}s)")
    else:
        print("Timeout! Downloading ourselves...")
        subprocess.run([sys.executable, 'scripts/ingest_data.py'], check=True)
        subprocess.run([sys.executable, 'scripts/compute_features.py'], check=True)

if drive_features.exists():
    for d in ['data/features', 'data/cleaned', 'data/raw']:
        Path(d).mkdir(parents=True, exist_ok=True)
    shutil.copy(drive_features, local_features)
    for name in ['nifty50_cleaned.parquet', 'nifty50_raw.parquet']:
        subdir = "cleaned" if "cleaned" in name else "raw"
        src = DRIVE_DIR / f'data/{subdir}/{name}'
        if src.exists():
            shutil.copy(src, f'data/{subdir}/{name}')
    print("Data loaded from Drive!")

import pandas as pd
df = pd.read_parquet(local_features)
print(f"Features: {df.shape[0]} rows, {df['ticker'].nunique()} tickers")

Data loaded from Drive!
Features: 177187 rows, 49 tickers


## RL Portfolio Allocation (SAC)
Soft Actor-Critic with automatic entropy tuning
- Off-policy, sample-efficient algorithm
- 500K timesteps (SAC is ~5x slower per step than PPO due to replay buffer + critic updates)
- Forced to CPU (SB3 MlpPolicy is faster on CPU than GPU)

In [3]:
# === RL SAC TRAINING ===
import time
import numpy as np
from quant_lab.utils.seed import set_global_seed
from quant_lab.utils.device import get_device
from quant_lab.rl.environments.portfolio_env import PortfolioEnvConfig
from quant_lab.rl.environments.reward import RewardConfig
from quant_lab.rl.training import train_rl, RLTrainingConfig

set_global_seed(42)

base_cols = {'date', 'ticker', 'open', 'high', 'low', 'close', 'volume', 'adj_close'}
feature_df = df  # from cell above
feat_cols = [c for c in feature_df.columns if c not in base_cols]

def build_feature_tensor(df, feat_cols, start, end):
    import pandas as pd
    df = df.copy()
    df['date'] = pd.to_datetime(df['date'])
    df = df[(df['date'] > start) & (df['date'] <= end)]
    if 'log_return_1d' not in df.columns:
        df['log_return_1d'] = df.groupby('ticker')['adj_close'].transform(lambda s: np.log(s / s.shift(1)))
    tickers = sorted(df['ticker'].unique())
    dates = sorted(df['date'].unique())
    features = np.zeros((len(dates), len(tickers), len(feat_cols)), dtype=np.float32)
    returns = np.zeros((len(dates), len(tickers)), dtype=np.float32)
    t_map = {t: i for i, t in enumerate(tickers)}
    d_map = {d: i for i, d in enumerate(dates)}
    for _, row in df.iterrows():
        ti, di = d_map[row['date']], t_map[row['ticker']]
        features[ti, di, :] = row[feat_cols].values.astype(np.float32)
        ret = row.get('log_return_1d', 0.0)
        returns[ti, di] = 0.0 if pd.isna(ret) else float(ret)
    return np.nan_to_num(features, nan=0.0), returns

print("Building feature tensors...")
train_features, train_returns = build_feature_tensor(feature_df, feat_cols, '1900-01-01', '2021-12-31')
val_features, val_returns = build_feature_tensor(feature_df, feat_cols, '2021-12-31', '2023-06-30')
print(f"Train: {train_features.shape}, Val: {val_features.shape}")

env_config = PortfolioEnvConfig(initial_cash=1_000_000, max_weight=0.20, rebalance_frequency=5)
reward_config = RewardConfig(lambda_mdd=0.5, lambda_turnover=0.01, commission_bps=10.0, slippage_bps=5.0, spread_bps=5.0)

# SAC: 500K steps (not 2M — SAC is much slower per step than PPO)
# Force CPU — SB3 MlpPolicy is faster on CPU than GPU
training_config = RLTrainingConfig(
    algorithm='sac',
    total_timesteps=500_000,
    eval_freq=50_000,
    n_eval_episodes=5,
    checkpoint_dir='outputs/models/rl/sac',
)

print(f"Starting SAC training (500K timesteps, CPU)...")
start = time.time()
result = train_rl(
    train_features=train_features, train_returns=train_returns,
    val_features=val_features, val_returns=val_returns,
    config=training_config, env_config=env_config, reward_config=reward_config,
    device='cpu',  # MlpPolicy is faster on CPU
)
elapsed = time.time() - start

print(f"\nSAC training done in {elapsed/60:.1f} min")
for k, v in result['train_metrics'].items():
    print(f"  Train {k}: {v:.4f}")
if 'val_metrics' in result:
    for k, v in result['val_metrics'].items():
        print(f"  Val   {k}: {v:.4f}")

# Save to Drive
for f in Path('outputs/models/rl/sac').glob('*'):
    shutil.copy(f, DRIVE_DIR / 'outputs/models/rl/sac' / f.name)
print("SAC agent saved to Drive!")

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


Building feature tensors...


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Train: (2962, 49, 15), Val: (370, 49, 15)
Starting SAC training (500K timesteps, CPU)...


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


2026-02-20 06:35:00 [info     ] sac_agent_created              lr=0.0003 policy=MlpPolicy
2026-02-20 06:35:00 [info     ] rl_training_start              algorithm=sac num_assets=49 num_steps=2962 total_timesteps=500000
2026-02-20 06:35:00 [info     ] sac_training_start             total_timesteps=500000
2026-02-20 07:39:02 [info     ] sac_training_complete
2026-02-20 07:39:04 [info     ] rl_train_eval                  mean_final_value=10116246.734055225 mean_reward=-58.49674657308177 std_final_value=0.0 std_reward=0.0
2026-02-20 07:39:04 [info     ] rl_val_eval                    mean_final_value=1152215.6988326719 mean_reward=-9.886782493378817 std_final_value=0.0 std_reward=0.0
2026-02-20 07:39:05 [info     ] sac_saved                      path=outputs/models/rl/sac/sac_agent
2026-02-20 07:39:05 [info     ] rl_training_complete

SAC training done in 64.1 min
  Train mean_reward: -58.4967
  Train std_reward: 0.0000
  Train mean_final_value: 10116246.7341
  Train std_final_value: 0.000

## Regime Detection
Market regime identification using KMeans clustering + Gaussian HMM

In [4]:
# === REGIME DETECTION ===
import time
from quant_lab.regime.detector import RegimeDetector, DetectorConfig
from quant_lab.regime.hmm import HMMConfig

# Compute market-level returns and volatility
clean_df = pd.read_parquet('data/cleaned/nifty50_cleaned.parquet')
clean_df = clean_df.sort_values(['ticker', 'date'])
clean_df['return'] = clean_df.groupby('ticker')['adj_close'].transform(lambda s: s.pct_change())
clean_df['volatility'] = clean_df.groupby('ticker')['return'].transform(lambda s: s.rolling(21, min_periods=5).std())
clean_df = clean_df.dropna(subset=['return', 'volatility'])

market_df = clean_df.groupby('date').agg(
    mean_return=('return', 'mean'),
    mean_volatility=('volatility', 'mean'),
).reset_index().sort_values('date')

returns = market_df['mean_return'].values
volatility = market_df['mean_volatility'].values
dates = market_df['date'].values

# 1. KMeans regime detection
print("Running KMeans regime detection...")
feature_matrix = np.column_stack([returns, volatility])
kmeans_config = DetectorConfig(method='kmeans', n_regimes=4)
kmeans_detector = RegimeDetector(kmeans_config)
kmeans_result = kmeans_detector.fit(embeddings=feature_matrix, returns=returns, volatility=volatility)
print(f"KMeans found {len(set(kmeans_result['labels'][kmeans_result['labels'] >= 0]))} regimes")
print(kmeans_result['summary'])

# 2. HMM regime detection
print("\nRunning HMM regime detection...")
hmm_config_obj = HMMConfig(n_regimes=4, covariance_type='diag')
hmm_detector_config = DetectorConfig(method='hmm', n_regimes=4, hmm_config=hmm_config_obj)
hmm_detector = RegimeDetector(hmm_detector_config)
hmm_result = hmm_detector.fit(returns=returns, volatility=volatility)
print(f"HMM found {len(set(hmm_result['labels'][hmm_result['labels'] >= 0]))} regimes")
print(hmm_result['summary'])

# Save regime labels (use HMM as primary)
regime_dir = Path('outputs/regimes')
regime_dir.mkdir(parents=True, exist_ok=True)

labels = hmm_result['labels']
regime_df_out = pd.DataFrame({'date': dates[:len(labels)], 'regime_label': labels})
regime_df_out.to_parquet(regime_dir / 'regime_labels.parquet', index=False)

summary = hmm_result['summary']
if len(summary) > 0:
    summary.to_parquet(regime_dir / 'regime_summary.parquet', index=False)

# Save to Drive
for f in regime_dir.glob('*.parquet'):
    shutil.copy(f, DRIVE_DIR / 'outputs/regimes' / f.name)
print("Regime labels saved to Drive!")

Running KMeans regime detection...
2026-02-20 07:39:05 [info     ] regime_detection_complete      label_map={0: 'Bear', 1: 'High-Vol Bull', 2: 'Transition', 3: 'Transition'} method=kmeans n_regimes=4


  $max \{ core_k(a), core_k(b), 1/\alpha d(a,b) \}$.
  return datetime.utcnow().replace(tzinfo=utc)


KMeans found 4 regimes
   cluster_id          label mean_return mean_volatility frequency  \
0           0           Bear     -0.0115          0.0192     22.6%   
1           1  High-Vol Bull      0.0101          0.0206     25.2%   
2           2     Transition      0.0016          0.0155     51.4%   
3           3     Transition      0.0091          0.0550      0.8%   

  avg_duration  
0     1.5 days  
1     2.0 days  
2     4.0 days  
3    15.0 days  

Running HMM regime detection...
2026-02-20 07:39:05 [info     ] regime_detection_complete      label_map={0: 'Transition', 1: 'High-Vol Bull', 2: 'Transition', 3: 'Crisis'} method=hmm n_regimes=4
HMM found 4 regimes
   cluster_id          label mean_return mean_volatility frequency  \
0           0     Transition      0.0008          0.0213     31.3%   
1           1  High-Vol Bull      0.0044          0.0458      1.4%   
2           2     Transition      0.0009          0.0158     67.3%   
3           3         Crisis     -0.0946    

  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Regime labels saved to Drive!


## Full Backtest + HTML Report
Backtest with regime-conditional performance breakdown
- Loads Transformer model from Notebook A (via Drive)
- Falls back to Ridge baseline if Transformer not available
- Generates interactive Plotly HTML report

In [5]:
# === BACKTEST ===
# CRITICAL: Transformer needs sequence-based inference via DataModule,
# not flat 2D arrays. This uses the same dual-path approach as run_backtest.py.
import time
import torch
from quant_lab.data.datasets import TemporalSplit, create_flat_datasets
from quant_lab.data.datamodule import QuantDataModule, DataModuleConfig
from quant_lab.features.feature_store import FeatureStore
from quant_lab.models.linear_baseline import RidgeBaseline
from quant_lab.backtest.engine import BacktestEngine, BacktestConfig
from quant_lab.backtest.execution import ExecutionModel

# Check if Transformer model is available from Notebook A (via Drive)
transformer_path = DRIVE_DIR / 'outputs/models/transformer/final_model.pt'
model = None
model_type = 'ridge_adhoc'

if transformer_path.exists():
    print("Loading Transformer model from Drive...")
    from quant_lab.models.transformer.model import TransformerForecaster
    Path('outputs/models/transformer').mkdir(parents=True, exist_ok=True)
    shutil.copy(transformer_path, 'outputs/models/transformer/final_model.pt')
    model = TransformerForecaster.load(Path('outputs/models/transformer/final_model.pt'))
    model_type = 'transformer'
    print(f"Loaded Transformer ({model.count_parameters():,} params)")
else:
    print("Transformer not ready yet. Will train Ridge baseline...")

# Prepare data
target_col = 'log_return_1d'
base_cols_set = {'date', 'ticker', 'open', 'high', 'low', 'close', 'volume', 'adj_close'}
feat_cols_bt = [c for c in feature_df.columns if c not in base_cols_set]

split = TemporalSplit(train_end='2021-12-31', val_end='2023-06-30')
datasets = create_flat_datasets(feature_df, feat_cols_bt, split, target_col=target_col)
X_train, y_train, meta_train = datasets['train']
X_test, y_test, meta_test = datasets['test']

# Generate signals — dual path depending on model type
if model_type == 'transformer':
    # Transformer needs 3D sequence input via DataModule + predict_returns()
    print("Generating signals via Transformer (sequence-based inference)...")
    dm = QuantDataModule(
        feature_df, feat_cols_bt, split,
        DataModuleConfig(
            sequence_length=63,
            target_col=target_col,
            batch_size=256,
            num_workers=2,
        ),
    )
    dm.setup()
    test_loader = dm.test_dataloader()

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    model.eval()

    all_preds = []
    with torch.no_grad():
        for x, _targets in test_loader:
            x = x.to(device)
            preds = model.predict_returns(x)
            all_preds.append(preds.cpu().numpy())

    import numpy as np
    test_preds = np.concatenate(all_preds)

    # Align with meta_test (DataModule may have fewer samples due to sequence_length)
    meta_test = meta_test.iloc[-len(test_preds):]
    y_test = y_test[-len(test_preds):]
    print(f"Transformer predictions: {len(test_preds)} samples")
else:
    # Ridge baseline — works with flat 2D input
    model = RidgeBaseline(alpha=1.0)
    model.fit(X_train, y_train)
    model_type = 'ridge_adhoc'
    test_preds = model.predict(X_test)
    print(f"Ridge predictions: {len(test_preds)} samples")

signals_df = meta_test.copy()
signals_df['signal'] = test_preds

test_dates = meta_test['date'].unique()
test_prices = feature_df[feature_df['date'].isin(test_dates)][['date', 'ticker', 'adj_close']].copy()

# Backtest
execution_model = ExecutionModel(commission_bps=10.0, slippage_bps=5.0, spread_bps=5.0, execution_delay_bars=1)
backtest_config = BacktestConfig(
    initial_capital=1_000_000, rebalance_frequency=5,
    max_position_size=0.20, top_n=5, risk_free_rate=0.05,
)
engine = BacktestEngine(execution_model=execution_model, config=backtest_config)
bt_result = engine.run(prices=test_prices, signals=signals_df)

# Print metrics
print(f"\n{'='*60}")
print(f"BACKTEST RESULTS ({model_type.upper()})")
print(f"{'='*60}")
for metric, value in bt_result.metrics.items():
    if 'return' in metric or 'cagr' in metric or 'drawdown' in metric:
        print(f"  {metric:25s}: {value:>10.2%}")
    else:
        print(f"  {metric:25s}: {value:>10.4f}")

# Generate HTML report
try:
    from quant_lab.backtest.report import BacktestReport, ReportConfig
    regime_labels = None
    regime_summary = None
    if (Path('outputs/regimes/regime_labels.parquet')).exists():
        regime_labels = pd.read_parquet('outputs/regimes/regime_labels.parquet')['regime_label'].values
    if (Path('outputs/regimes/regime_summary.parquet')).exists():
        regime_summary = pd.read_parquet('outputs/regimes/regime_summary.parquet')

    report_config = ReportConfig(title=f'Backtest Report - {model_type.title()}', output_dir='outputs/backtests')
    report = BacktestReport(report_config)
    report_path = report.generate(
        portfolio_values=bt_result.equity_curve.values,
        dates=bt_result.equity_curve.index,
        metrics=bt_result.metrics,
        weights_history=bt_result.weights_history.values,
        regime_labels=regime_labels[:len(bt_result.equity_curve)] if regime_labels is not None else None,
        regime_summary=regime_summary,
    )
    print(f"\nReport saved: {report_path}")

    # Save to Drive
    for f in Path('outputs/backtests').glob('*'):
        shutil.copy(f, DRIVE_DIR / 'outputs/backtests' / f.name)
    print("Report saved to Drive!")
except Exception as e:
    import traceback
    print(f"Report generation failed: {e}")
    traceback.print_exc()

Loading Transformer model from Drive...
Loaded Transformer (4,744,966 params)
2026-02-20 07:39:06 [info     ] flat_split_train               samples=131813
2026-02-20 07:39:06 [info     ] flat_split_val                 samples=18130
2026-02-20 07:39:06 [info     ] flat_split_test                samples=18032
Generating signals via Transformer (sequence-based inference)...
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_lengt

  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2962 valid_samples=2711
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=1018 valid_samples=767
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_fea

  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=1050 valid_samples=799
2026-02-20 07:39:07 [debug    ] dataset_created                num_fea

  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=2961 valid_samples=2710
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=370 valid_samples=307
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=370 valid_samples=307
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=370 valid_samples=307
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=370 valid_samples=307
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=370 valid_samples=307
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 sequence_length=63 total_rows=370 valid_samples=307
2026-02-20 07:39:07 [debug    ] dataset_created                num_features=15 se

  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return date

Transformer predictions: 14945 samples
2026-02-20 07:39:09 [info     ] backtest_complete              annual_turnover=63.9587 avg_daily_turnover=0.2538 cagr=0.4077 calmar=2.5972 max_drawdown=-0.1570 sharpe=1.6691 sortino=1.9457 total_return=0.6476 volatility=0.1869

BACKTEST RESULTS (TRANSFORMER)
  cagr                     :     40.77%
  sharpe                   :     1.6691
  sortino                  :     1.9457
  max_drawdown             :    -15.70%
  calmar                   :     2.5972
  total_return             :     64.76%
  volatility               :     0.1869
  avg_daily_turnover       :     0.2538
  annual_turnover          :    63.9587


  return datetime.utcnow().replace(tzinfo=utc)

datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).



2026-02-20 07:39:10 [info     ] report_generated               path=outputs/backtests/report.html

Report saved: outputs/backtests/report.html
Report saved to Drive!


In [6]:
print("=" * 60)
print("NOTEBOOK C COMPLETE")
print("=" * 60)
print(f"\nAll outputs on Drive:")
for d in ['outputs/models/rl/sac', 'outputs/regimes', 'outputs/backtests']:
    p = DRIVE_DIR / d
    if p.exists():
        for f in sorted(p.glob('*')):
            if f.is_file():
                print(f"  {f.relative_to(DRIVE_DIR)}: {f.stat().st_size/1e6:.1f} MB")
print(f"\nBacktest: Sharpe={bt_result.metrics.get('sharpe', 'N/A'):.4f}, CAGR={bt_result.metrics.get('cagr', 'N/A'):.2%}")
print("=" * 60)
print("\nAll 3 notebooks complete! Check Google Drive for all outputs.")

NOTEBOOK C COMPLETE

All outputs on Drive:
  outputs/models/rl/sac/sac_agent.zip: 12.5 MB
  outputs/regimes/regime_labels.parquet: 0.0 MB
  outputs/regimes/regime_summary.parquet: 0.0 MB
  outputs/backtests/report.html: 0.2 MB

Backtest: Sharpe=1.6691, CAGR=40.77%

All 3 notebooks complete! Check Google Drive for all outputs.
