# Notebook E: Optuna Hyperparameter Sweep on NIFTY 500
**Run on Colab Pro+ H100** | Finds optimal hyperparameters for Transformer & TFT on broader universe

In [None]:
# === ENVIRONMENT SETUP ===
import subprocess, sys, os

# Reset working directory (prevents getcwd bug on re-run)
os.chdir("/content")

# Clean and re-clone for fresh code
if os.path.exists('/content/quant-lab'):
    print("Removing existing quant-lab directory...")
    subprocess.run(['rm', '-rf', '/content/quant-lab'])

print("Cloning repository...")
result = subprocess.run(
    ['git', 'clone', 'https://github.com/Mohit1053/quant-lab.git', '/content/quant-lab'],
    capture_output=True, text=True,
)
if result.returncode != 0:
    print(f"Clone failed: {result.stderr}")
    raise RuntimeError("Git clone failed")
print("Clone successful.")

os.chdir('/content/quant-lab')
subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', '-e', '.'], check=True)
print("Package installed.")

# Install optuna
subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', 'optuna'], check=True)
print("Optuna installed.")

from google.colab import drive
drive.mount('/content/drive', force_remount=False)

from pathlib import Path
DRIVE_DIR = Path('/content/drive/MyDrive/quant_lab')
for d in ['data/features', 'data/cleaned', 'outputs/sweep/transformer', 'outputs/sweep/tft']:
    (DRIVE_DIR / d).mkdir(parents=True, exist_ok=True)

import torch
if torch.cuda.is_available():
    gpu = torch.cuda.get_device_name(0)
    mem = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"GPU: {gpu} ({mem:.1f} GB)")
else:
    print("WARNING: No GPU!")

In [None]:
# === FIX NUMPY/SCIPY VERSIONS (Colab ships newer incompatible versions) ===
!pip uninstall -y numpy pandas scipy scikit-learn
!pip install --no-cache-dir numpy==1.26.4 pandas==2.2.2 scipy==1.11.4 scikit-learn==1.4.2

## Data Loading — Important Note

**NSE blocks Colab datacenter IPs**, so fetching NIFTY 500 tickers directly won't work.

**Recommended**: Upload `nifty500_features.parquet` (184MB) to Google Drive at `My Drive/quant_lab/data/features/` before running the next cell. Or run Notebook D first — it caches data to Drive.

In [None]:
# === LOAD NIFTY 500 DATA ===
# Priority: 1) Google Drive cache  2) File upload  3) NSE pipeline (may fail on Colab IPs)
import shutil
import pandas as pd

drive_features = DRIVE_DIR / 'data/features/nifty500_features.parquet'
local_features = Path('data/features/nifty500_features.parquet')
Path('data/features').mkdir(parents=True, exist_ok=True)
Path('data/cleaned').mkdir(parents=True, exist_ok=True)

loaded = False

# --- Option 1: Load from Google Drive (fastest, recommended for 184MB file) ---
if drive_features.exists():
    shutil.copy(drive_features, local_features)
    src = DRIVE_DIR / 'data/cleaned/nifty500_cleaned.parquet'
    if src.exists():
        shutil.copy(src, 'data/cleaned/nifty500_cleaned.parquet')
    print(f"✅ NIFTY 500 data loaded from Google Drive! ({drive_features.stat().st_size/1e6:.0f} MB)")
    loaded = True

# --- Option 2: Upload from local machine (may timeout for >100MB) ---
if not loaded:
    print("⚠️  No NIFTY 500 data on Drive at:")
    print(f"    {drive_features}")
    print()
    print("  RECOMMENDED: Upload nifty500_features.parquet to Google Drive:")
    print("    1. Go to drive.google.com")
    print("    2. Navigate to My Drive/quant_lab/data/features/")
    print("    3. Upload nifty500_features.parquet (184 MB)")
    print("    4. Re-run this cell")
    print()
    print("  Or try direct upload (may timeout for large files)...")
    try:
        from google.colab import files
        uploaded = files.upload()
        for fname, content in uploaded.items():
            if 'nifty500' in fname and fname.endswith('.parquet'):
                with open(str(local_features), 'wb') as f:
                    f.write(content)
                (DRIVE_DIR / 'data/features').mkdir(parents=True, exist_ok=True)
                shutil.copy(local_features, drive_features)
                print(f"✅ Uploaded {fname} ({len(content)/1e6:.1f} MB) — saved to Drive for next time!")
                loaded = True
                break
        if not loaded and uploaded:
            print("❌ No nifty500 parquet found in uploaded files.")
    except Exception as e:
        print(f"Upload skipped/failed: {e}")

# --- Option 3: Try NSE pipeline (usually fails from Colab) ---
if not loaded:
    print("\n⏳ Attempting NSE pipeline download (may fail if NSE blocks Colab IPs)...")
    try:
        subprocess.run(
            [sys.executable, 'scripts/run_pipeline.py', 'data.universe.name=nifty500'],
            check=True, timeout=600,
        )
        if local_features.exists():
            (DRIVE_DIR / 'data/features').mkdir(parents=True, exist_ok=True)
            shutil.copy(local_features, drive_features)
            print("✅ NIFTY 500 data downloaded and saved to Drive!")
            loaded = True
    except Exception as e:
        print(f"❌ NSE pipeline failed: {e}")
        print("   This is expected — NSE blocks Colab datacenter IPs.")
        print("   Upload nifty500_features.parquet to Google Drive and re-run this cell.")

if not loaded:
    raise FileNotFoundError(
        "Could not load NIFTY 500 data.\n"
        "Upload nifty500_features.parquet to Google Drive at:\n"
        f"  {drive_features}\n"
        "Or run Notebook D first to populate Google Drive."
    )

df = pd.read_parquet(local_features)
print(f"\nNIFTY 500: {df.shape[0]:,} rows, {df['ticker'].nunique()} tickers")

## Optuna Sweep — Transformer
TPE sampler with median pruning, 50 trials, 2h timeout

In [None]:
# === OPTUNA SWEEP: TRANSFORMER ===
import time
import json
import optuna
import numpy as np
import torch
from quant_lab.utils.seed import set_global_seed
from quant_lab.utils.device import get_device
from quant_lab.data.datasets import TemporalSplit
from quant_lab.data.datamodule import QuantDataModule, DataModuleConfig
from quant_lab.models.transformer.model import TransformerForecaster, TransformerConfig, MultiTaskLoss
from quant_lab.training.trainer import Trainer, TrainerConfig

set_global_seed(42)
device = get_device()

base_cols = {'date', 'ticker', 'open', 'high', 'low', 'close', 'volume', 'adj_close'}
feature_cols = [c for c in df.columns if c not in base_cols]
split = TemporalSplit(train_end='2021-12-31', val_end='2023-06-30')

def transformer_objective(trial):
    d_model = trial.suggest_categorical('d_model', [64, 128, 256])
    nhead = trial.suggest_categorical('nhead', [4, 8])
    num_layers = trial.suggest_int('num_encoder_layers', 2, 6)
    dim_ff = trial.suggest_categorical('dim_feedforward', [256, 512, 1024])
    dropout = trial.suggest_float('dropout', 0.05, 0.3)
    lr = trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True)
    wd = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    seq_len = trial.suggest_categorical('sequence_length', [21, 42, 63])

    dm = QuantDataModule(
        df, feature_cols, split,
        DataModuleConfig(sequence_length=seq_len, target_col='log_return_1d', batch_size=batch_size, num_workers=2),
    )
    dm.setup()
    tl = dm.train_dataloader()
    vl = dm.val_dataloader()
    if tl is None:
        return float('inf')

    cfg = TransformerConfig(
        num_features=dm.num_features, d_model=d_model, nhead=nhead,
        num_encoder_layers=num_layers, dim_feedforward=dim_ff, dropout=dropout,
        direction_weight=0.3, volatility_weight=0.3,
    )
    model = TransformerForecaster(cfg)
    loss_fn = MultiTaskLoss(cfg)

    tc = TrainerConfig(
        epochs=30, learning_rate=lr, weight_decay=wd,
        warmup_steps=500, patience=5, mixed_precision=True,
        checkpoint_dir=f'outputs/sweep/transformer/trial_{trial.number}',
    )
    trainer = Trainer(model, loss_fn, tc, device)
    trainer.fit(tl, vl)

    return trainer.best_val_loss

study = optuna.create_study(
    direction='minimize',
    sampler=optuna.samplers.TPESampler(seed=42),
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10),
)

print("Starting Transformer sweep (50 trials)...")
start = time.time()
study.optimize(transformer_objective, n_trials=50, timeout=7200)
elapsed = time.time() - start

print(f"\nSweep done in {elapsed/60:.1f} min")
print(f"Best val loss: {study.best_value:.6f}")
print(f"Best params: {json.dumps(study.best_params, indent=2)}")

# Save results
out_dir = Path('outputs/sweep/transformer')
out_dir.mkdir(parents=True, exist_ok=True)
with open(out_dir / 'best_params.json', 'w') as f:
    json.dump(study.best_params, f, indent=2)

trials_data = [{'number': t.number, 'value': t.value, 'params': t.params} for t in study.trials if t.value is not None]
with open(out_dir / 'all_trials.json', 'w') as f:
    json.dump(trials_data, f, indent=2)

# Save to Drive
for f_path in out_dir.glob('*.json'):
    shutil.copy(f_path, DRIVE_DIR / 'outputs/sweep/transformer' / f_path.name)
print("Transformer sweep results saved to Drive!")

## Optuna Sweep — TFT
TPE sampler with median pruning, 50 trials, 2h timeout

In [None]:
# === OPTUNA SWEEP: TFT ===
from quant_lab.models.tft.model import TFTForecaster, TFTConfig

def tft_objective(trial):
    d_model = trial.suggest_categorical('d_model', [16, 32, 64])
    nhead = trial.suggest_categorical('nhead', [2, 4])
    num_layers = trial.suggest_int('num_encoder_layers', 1, 3)
    lstm_hidden = trial.suggest_categorical('lstm_hidden', [16, 32, 64])
    grn_hidden = trial.suggest_categorical('grn_hidden', [8, 16, 32])
    dropout = trial.suggest_float('dropout', 0.1, 0.5)
    lr = trial.suggest_float('learning_rate', 1e-4, 1e-3, log=True)
    wd = trial.suggest_float('weight_decay', 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    seq_len = trial.suggest_categorical('sequence_length', [21, 42, 63])

    dm = QuantDataModule(
        df, feature_cols, split,
        DataModuleConfig(sequence_length=seq_len, target_col='log_return_1d', batch_size=batch_size, num_workers=2),
    )
    dm.setup()
    tl = dm.train_dataloader()
    vl = dm.val_dataloader()
    if tl is None:
        return float('inf')

    cfg = TFTConfig(
        num_features=dm.num_features, d_model=d_model, nhead=nhead,
        num_encoder_layers=num_layers, lstm_layers=1, lstm_hidden=lstm_hidden,
        grn_hidden=grn_hidden, dropout=dropout,
        direction_weight=0.3, volatility_weight=0.3,
    )
    model = TFTForecaster(cfg)
    loss_cfg = TransformerConfig(num_features=dm.num_features, direction_weight=0.3, volatility_weight=0.3)
    loss_fn = MultiTaskLoss(loss_cfg)

    tc = TrainerConfig(
        epochs=30, learning_rate=lr, weight_decay=wd,
        warmup_steps=300, patience=5, mixed_precision=True,
        checkpoint_dir=f'outputs/sweep/tft/trial_{trial.number}',
    )
    trainer = Trainer(model, loss_fn, tc, device)
    trainer.fit(tl, vl)

    return trainer.best_val_loss

tft_study = optuna.create_study(
    direction='minimize',
    sampler=optuna.samplers.TPESampler(seed=42),
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5),
)

print("Starting TFT sweep (50 trials)...")
start = time.time()
tft_study.optimize(tft_objective, n_trials=50, timeout=7200)
elapsed = time.time() - start

print(f"\nSweep done in {elapsed/60:.1f} min")
print(f"Best val loss: {tft_study.best_value:.6f}")
print(f"Best params: {json.dumps(tft_study.best_params, indent=2)}")

# Save
out_dir = Path('outputs/sweep/tft')
out_dir.mkdir(parents=True, exist_ok=True)
with open(out_dir / 'best_params.json', 'w') as f:
    json.dump(tft_study.best_params, f, indent=2)
trials_data = [{'number': t.number, 'value': t.value, 'params': t.params} for t in tft_study.trials if t.value is not None]
with open(out_dir / 'all_trials.json', 'w') as f:
    json.dump(trials_data, f, indent=2)

for f_path in out_dir.glob('*.json'):
    shutil.copy(f_path, DRIVE_DIR / 'outputs/sweep/tft' / f_path.name)
print("TFT sweep results saved to Drive!")

## Summary
Best hyperparameters from both sweeps

In [None]:
print("=" * 60)
print("NOTEBOOK E COMPLETE — OPTUNA SWEEPS")
print("=" * 60)
print(f"\nTransformer best val loss: {study.best_value:.6f}")
print(f"Transformer best params: {json.dumps(study.best_params, indent=2)}")
print(f"\nTFT best val loss: {tft_study.best_value:.6f}")
print(f"TFT best params: {json.dumps(tft_study.best_params, indent=2)}")
print(f"\nAll results on Drive: {DRIVE_DIR / 'outputs/sweep'}")
print("=" * 60)