<a href="https://colab.research.google.com/github/Shadfurman/FourteenToAlpha/blob/main/trading_bot_scaffold.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Trading Bot: Conv-Transformer → Backtest (Scaffold)

End-to-end notebook scaffold. Fill in the TODOs only—no scope creep.

**Sections:**
- Setup & seed
- Data & features
- Split
- Model
- Train
- Inference & backtest
- Metrics & plots
- Params dump & outputs


## Setup & seed

In [2]:
# Minimal installs for Colab (comment any you don't need)
try:
    import google.colab  # type: ignore
    IN_COLAB = True
except Exception:
    IN_COLAB = False

if IN_COLAB:
    !pip -q install yfinance pandas numpy scikit-learn matplotlib torch torchvision torchaudio pyarrow --progress-bar off

import os, json, random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

SEED = 42  # <-- SINGLE SOURCE OF TRUTH
def set_seed(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_seed(SEED)

# Global paths
OUTPUT_DIR = './outputs'
PARQUET_DIR = '/content/data_parquet'
DATA_IN_DIRS = [
    '/content/drive/MyDrive/stock_market_data/nasdaq/csv',
    '/content/drive/MyDrive/stock_market_data/nyse/csv',
]
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(PARQUET_DIR, exist_ok=True)

# PARAMS — edit here only
PARAMS = {
    'tickers': "ALL_PARQUET",  # will discover from parquet folder
    'start': '1970-01-01',
    'end': '2025-01-01',
    'lookback': 64,
    'features': ['pct_change', 'roll_mean_10', 'roll_vol_10', 'rsi_14'],
    'split': {
        'train_start': '2010-01-01', 'train_end': '2018-12-31',
        'val_start':   '2019-01-01', 'val_end':   '2019-12-31',
        'test_start':  '2020-01-01', 'test_end':  '2024-12-31',
    },
    'model_option': 'A',
    'epochs': 12,
    'batch_size': 256,
    'lr': 1e-3,
    'threshold_long': 0.55,
    'threshold_short': 0.45,
    'cost_bps': 5,
}
with open(os.path.join(OUTPUT_DIR, 'params_seed.json'), 'w') as f:
    json.dump({'seed': SEED, 'params': PARAMS}, f, indent=2)

# Quick environment check
device = "cuda" if torch.cuda.is_available() else "cpu"
gpu = torch.cuda.get_device_name(0) if device == "cuda" else "CPU"
print(f"Device: {device} [{gpu}]")
print('Seed & PARAMS set; outputs/params_seed.json written.')


Device: cpu [CPU]
Seed & PARAMS set; outputs/params_seed.json written.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Data & features

In [5]:
import yfinance as yf

def load_ohlcv_yf(ticker: str, start: str, end: str) -> pd.DataFrame:
    """Load daily OHLCV from yfinance. TODO: cache to CSV in ./data."""
    df = yf.download(ticker, start=start, end=end, progress=False)
    if df.empty:
        raise ValueError(f'No data for {ticker}')
    df = df[['Open','High','Low','Close','Volume']].copy()
    df.index.name = 'Date'
    df['Ticker'] = ticker
    return df

def compute_rsi(series: pd.Series, period: int = 14) -> pd.Series:
    series = series.astype(float)
    delta = series.diff()
    up = delta.clip(lower=0.0)
    down = (-delta).clip(lower=0.0)
    roll_up = up.ewm(alpha=1/period, adjust=False).mean()
    roll_down = down.ewm(alpha=1/period, adjust=False).mean()
    rs = roll_up / (roll_down + 1e-8)
    return 100.0 - (100.0 / (1.0 + rs))

def make_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Compute pct-change, rolling mean/vol(10), RSI(14) on adjusted close if present.
    Add next-day direction target y (1 if up, else 0). Drops NaNs and sorts index.
    """
    out = df.copy()
    out = out.sort_index()

    # Prefer adjusted. If converter already replaced Close with adjusted, this is a no-op.
    price = out['AdjClose'] if 'AdjClose' in out.columns else out['Close']

    out['pct_change']   = price.pct_change()
    out['roll_mean_10'] = out['pct_change'].rolling(window=10, min_periods=10).mean()
    out['roll_vol_10']  = out['pct_change'].rolling(window=10, min_periods=10).std()
    out['rsi_14']       = compute_rsi(price, 14)

    # Target uses forward return on the same price series
    out['ret_fwd_1'] = price.pct_change().shift(-1)
    out['y'] = (out['ret_fwd_1'] > 0).astype(int)

    # Final cleanup: drop rows with any NaNs in features/target
    out = out.dropna(subset=['pct_change', 'roll_mean_10', 'roll_vol_10', 'rsi_14', 'ret_fwd_1', 'y'])
    return out

def load_all_tickers(tickers, start, end):
    frames = []
    for t in tickers:
        raw = load_ohlcv_yf(t, start, end)
        feats = make_features(raw)
        frames.append(feats)
    return pd.concat(frames).sort_index()

# TODO: Optionally visualize a sample
print('Data/feature functions defined. Fill TODOs as needed.')


Data/feature functions defined. Fill TODOs as needed.


## CSV→Parquet converter

In [None]:
# --- CSV → Parquet (one-time) ---
import glob

# Your paths
OUTPUT_DIR = './outputs'
DATA_IN_DIRS = [
    '/content/drive/MyDrive/stock_market_data/nasdaq/csv',
    '/content/drive/MyDrive/stock_market_data/nyse/csv',
]
PARQUET_DIR = '/content/data_parquet'
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(PARQUET_DIR, exist_ok=True)

# Map your CSV headers to canonical names we use elsewhere
CSV_COLS = {
    'Date': 'Date',
    'Open': 'Open',
    'High': 'High',
    'Low': 'Low',
    'Close': 'Close',                 # raw close (we'll replace with Adjusted Close for features)
    'Volume': 'Volume',
    'Adjusted Close': 'AdjClose',
}

def guess_ticker_from_path(p):
    import os
    return os.path.splitext(os.path.basename(p))[0].upper()

def load_csv_one(p):
    # Read permissively (handles odd rows/column orders)
    df = pd.read_csv(
        p,
        engine='python',           # robust parser
        on_bad_lines='skip'        # skip corrupted rows
    )

    # Normalize column names (lower -> canonical)
    orig_cols = df.columns.tolist()
    lc_map = {c.lower().strip(): c for c in df.columns}

    # Find variants
    date_col = lc_map.get('date')
    open_col = lc_map.get('open')
    high_col = lc_map.get('high')
    low_col  = lc_map.get('low')
    close_col= lc_map.get('close')
    vol_col  = lc_map.get('volume')
    adj_col  = lc_map.get('adjusted close') or lc_map.get('adj close') or lc_map.get('adjclose')

    # Hard fail if no Date
    if not date_col:
        raise ValueError(f'No Date column in {p} (had: {orig_cols})')

    # Build a minimal frame with whatever we found
    cols = {}
    cols['Date']   = date_col
    if open_col:  cols['Open']  = open_col
    if high_col:  cols['High']  = high_col
    if low_col:   cols['Low']   = low_col
    if close_col: cols['Close'] = close_col
    if vol_col:   cols['Volume']= vol_col
    if adj_col:   cols['AdjClose'] = adj_col

    df = df.rename(columns={v:k for k,v in cols.items()})[list(cols.keys())]

    # Parse day-first dates like 15-12-2010
    df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')
    df = df.dropna(subset=['Date']).sort_values('Date').set_index('Date')

    # Prefer adjusted close for features; fall back to Close
    if 'AdjClose' in df.columns:
        df['Close_raw'] = df.get('Close', df['AdjClose'])
        df['Close'] = df['AdjClose']
    else:
        # No adjusted close available; use raw Close
        if 'Close' not in df.columns:
            raise ValueError(f'No Close/AdjClose in {p} (had: {orig_cols})')
        df['Close_raw'] = df['Close']

    # Coerce numerics (strip stray chars like $ or commas)
    for c in ['Open','High','Low','Close','Volume','AdjClose','Close_raw']:
        if c in df.columns:
            df[c] = (df[c].astype(str)
                           .str.replace(r'[^0-9\.\-eE]', '', regex=True)
                           .replace({'': None}))
            df[c] = pd.to_numeric(df[c], errors='coerce')

    # Basic sanity: drop rows missing Close or Volume
    need = ['Close']
    if 'Volume' in df.columns: need.append('Volume')
    df = df.dropna(subset=need)

    df['Ticker'] = guess_ticker_from_path(p)
    return df

def iter_csv_files():
    for d in DATA_IN_DIRS:
        for p in glob.glob(os.path.join(d, '*.csv')):
            yield p

def build_parquet_once():
    converted, skipped = 0, []
    for p in iter_csv_files():
        tkr = guess_ticker_from_path(p)
        outp = os.path.join(PARQUET_DIR, f'{tkr}.parquet')
        if os.path.exists(outp):
            continue
        try:
            df = load_csv_one(p)
            feats = make_features(df)  # uses adjusted Close internally now
            feats = feats.drop(columns=[c for c in ['AdjClose','Close_raw'] if c in feats.columns], errors='ignore')
            feats.to_parquet(outp)
            converted += 1
            if converted % 50 == 0:
                print(f'Converted {converted} tickers...')
        except Exception as e:
            skipped.append((tkr, str(e)))
            if len(skipped) <= 5:
                print(f'[WARN] Skipping {tkr}: {e}')
    print(f'Parquet conversion complete. Total tickers converted: {converted}. Skipped: {len(skipped)}')
    if skipped:
        import json
        with open(os.path.join(OUTPUT_DIR, 'skipped_files.json'), 'w') as f:
            json.dump(skipped, f, indent=2)
        print('Wrote outputs/skipped_files.json')

build_parquet_once()


Converted 50 tickers...
Converted 100 tickers...
Converted 150 tickers...
Converted 200 tickers...
Converted 250 tickers...
Converted 300 tickers...
Converted 350 tickers...
Converted 400 tickers...
Converted 450 tickers...
Converted 500 tickers...
Converted 550 tickers...
Converted 600 tickers...
Converted 650 tickers...
Converted 700 tickers...
Converted 750 tickers...
Converted 800 tickers...
Converted 850 tickers...
Converted 900 tickers...
Converted 950 tickers...
Converted 1000 tickers...
Converted 1050 tickers...
Converted 1100 tickers...
Converted 1150 tickers...
Converted 1200 tickers...
Converted 1250 tickers...
Converted 1300 tickers...
Converted 1350 tickers...
Converted 1400 tickers...
Converted 1450 tickers...
Converted 1500 tickers...
Converted 1550 tickers...
Converted 1600 tickers...
Converted 1650 tickers...
Converted 1700 tickers...
Converted 1750 tickers...
Converted 1800 tickers...
Converted 1850 tickers...
Converted 1900 tickers...
Converted 1950 tickers...
Conver

## Parquet Loader

In [4]:
# --- Parquet path index (no big concat) ---
import glob, os
from datetime import datetime

PARQUET_DIR = '/content/data_parquet'  # already set
paths = sorted(glob.glob(os.path.join(PARQUET_DIR, '*.parquet')))

# Utility to count rows in a date window for one ticker without keeping it in RAM
def count_rows_in_range(p, start, end):
    df = pd.read_parquet(p, columns=['Ticker'])  # small read
    # quick read of index range by reloading with date filter
    df = pd.read_parquet(p)  # still per-ticker, small-ish
    df = df.loc[(df.index >= pd.to_datetime(start)) & (df.index <= pd.to_datetime(end))]
    return len(df)

split_cfg = PARAMS['split']
# Build file lists for each split (only tickers with enough rows in ALL splits)
keep_paths = []
train_paths, val_paths, test_paths = [], [], []

lookback = PARAMS['lookback']
for p in paths:
    n_train = count_rows_in_range(p, split_cfg['train_start'], split_cfg['train_end'])
    n_val   = count_rows_in_range(p, split_cfg['val_start'],   split_cfg['val_end'])
    n_test  = count_rows_in_range(p, split_cfg['test_start'],  split_cfg['test_end'])
    if (n_train >= lookback + 2) and (n_val >= lookback + 2) and (n_test >= lookback + 2):
        keep_paths.append(p)

def filter_paths_by_range(paths, start, end):
    out = []
    for p in paths:
        # quick check again to avoid reusing tiny tickers
        n = count_rows_in_range(p, start, end)
        if n >= lookback + 2:
            out.append(p)
    return out

train_paths = filter_paths_by_range(keep_paths, split_cfg['train_start'], split_cfg['train_end'])
val_paths   = filter_paths_by_range(keep_paths, split_cfg['val_start'],   split_cfg['val_end'])
test_paths  = filter_paths_by_range(keep_paths, split_cfg['test_start'],  split_cfg['test_end'])

print(f"tickers kept: {len(keep_paths)}")
print(f"train_paths: {len(train_paths)}, val_paths: {len(val_paths)}, test_paths: {len(test_paths)}")

tickers kept: 2406
train_paths: 2406, val_paths: 2406, test_paths: 2406


## Build full_df for split

In [6]:
# --- Build full_df for split (features + target only) ---
import glob, os

MIN_COLS = PARAMS['features'] + ['y', 'Ticker']

def load_all_parquet_min(parquet_dir=PARQUET_DIR, columns=MIN_COLS):
    frames = []
    for p in glob.glob(os.path.join(parquet_dir, '*.parquet')):
        frames.append(pd.read_parquet(p, columns=columns))
    return pd.concat(frames).sort_index()

full_df = load_all_parquet_min()
print('full_df:', len(full_df), 'rows |', full_df['Ticker'].nunique(), 'tickers')


full_df: 15236147 rows | 2660 tickers


## Split (rolling walk-forward)

In [7]:
# --- Walk-forward split + filter short tickers ---
# assumes: full_df, PARAMS, get_splits() already defined

def time_slice(df, start, end):
    return df.loc[(df.index >= pd.to_datetime(start)) & (df.index <= pd.to_datetime(end))]

def get_splits(df, split_cfg):
    train_df = time_slice(df, split_cfg['train_start'], split_cfg['train_end'])
    val_df   = time_slice(df, split_cfg['val_start'],   split_cfg['val_end'])
    test_df  = time_slice(df, split_cfg['test_start'],  split_cfg['test_end'])
    return train_df, val_df, test_df

train_df, val_df, test_df = get_splits(full_df, PARAMS['split'])

def min_len_ok(df, lookback):
    # need at least lookback + 2 rows to form a label and 1-step shift
    return df.groupby('Ticker').size() >= (lookback + 2)

lookback = PARAMS['lookback']

ok_train = set(min_len_ok(train_df, lookback).pipe(lambda s: s[s].index))
ok_val   = set(min_len_ok(val_df,   lookback).pipe(lambda s: s[s].index))
ok_test  = set(min_len_ok(test_df,  lookback).pipe(lambda s: s[s].index))

# keep tickers present (and long enough) in ALL splits to avoid leakage/imbalance
keep = ok_train & ok_val & ok_test

def keep_tickers(df, tickers):
    return df[df['Ticker'].isin(tickers)].copy()

train_df = keep_tickers(train_df, keep)
val_df   = keep_tickers(val_df,   keep)
test_df  = keep_tickers(test_df,  keep)

for name, df in [('train', train_df), ('val', val_df), ('test', test_df)]:
    print(f"{name}: {df['Ticker'].nunique():>5} tickers | {len(df):>9} rows")


train:  2406 tickers |   5036427 rows
val:  2406 tickers |    606112 rows
test:  2406 tickers |   1778231 rows


## --- Walk-forward split + filter short tickers + build SeqDatasets


In [8]:
# --- Walk-forward split + filter short tickers + build SeqDatasets (self-contained) ---

import pandas as pd
import numpy as np

# 1) Define get_splits if not already defined
if 'get_splits' not in globals():
    def time_slice(df, start, end):
        return df.loc[(df.index >= pd.to_datetime(start)) & (df.index <= pd.to_datetime(end))]
    def get_splits(df, split_cfg):
        train_df = time_slice(df, split_cfg['train_start'], split_cfg['train_end'])
        val_df   = time_slice(df, split_cfg['val_start'],   split_cfg['val_end'])
        test_df  = time_slice(df, split_cfg['test_start'],  split_cfg['test_end'])
        return train_df, val_df, test_df

train_df, val_df, test_df = get_splits(full_df, PARAMS['split'])

# 2) Keep only tickers with enough rows in ALL splits (avoid short series)
def min_len_ok(df, lookback):
    return df.groupby('Ticker').size() >= (lookback + 2)

lookback = PARAMS['lookback']
ok_train = set(min_len_ok(train_df, lookback).pipe(lambda s: s[s].index))
ok_val   = set(min_len_ok(val_df,   lookback).pipe(lambda s: s[s].index))
ok_test  = set(min_len_ok(test_df,  lookback).pipe(lambda s: s[s].index))
keep = ok_train & ok_val & ok_test

def keep_tickers(df, tickers):
    return df[df['Ticker'].isin(tickers)].copy()

train_df = keep_tickers(train_df, keep)
val_df   = keep_tickers(val_df,   keep)
test_df  = keep_tickers(test_df,  keep)

for name, df in [('train', train_df), ('val', val_df), ('test', test_df)]:
    print(f"{name}: {df['Ticker'].nunique():>5} tickers | {len(df):>9} rows")

# --- Fix boundary leakage: drop last row per ticker within each split ---
def drop_last_per_ticker(df):
    df = df.sort_index()
    last_idx = df.groupby('Ticker').tail(1).index
    return df.drop(index=last_idx)

train_df = drop_last_per_ticker(train_df)
val_df   = drop_last_per_ticker(val_df)
test_df  = drop_last_per_ticker(test_df)

for name, df in [('train', train_df), ('val', val_df), ('test', test_df)]:
    print(f"{name}: {df['Ticker'].nunique():>5} tickers | {len(df):>9} rows (boundary-safe)")

# 3) Define SeqDataset if not already defined (matches earlier signature)
if 'SeqDataset' not in globals():
    from torch.utils.data import Dataset
    class SeqDataset(Dataset):
        def __init__(self, df: pd.DataFrame, features, lookback=64):
            self.features = features; self.lookback = lookback
            self.X, self.y = [], []
            for _, g in df.groupby('Ticker', sort=False):
                g = g.sort_index()  # ensure time order
                Xg = g[features].values.astype(np.float32)
                yg = g['y'].values.astype(np.int64)
                for i in range(lookback, len(g)):
                    self.X.append(Xg[i-lookback:i])
                    self.y.append(yg[i])
            self.X = np.array(self.X, dtype=np.float32)
            self.y = np.array(self.y, dtype=np.int64)
        def __len__(self): return len(self.y)
        def __getitem__(self, idx): return self.X[idx], self.y[idx]

# 4) Build datasets and show lengths
features = PARAMS['features']; lb = PARAMS['lookback']
train_ds = SeqDataset(train_df, features, lookback=lb)
val_ds   = SeqDataset(val_df,   features, lookback=lb)
test_ds  = SeqDataset(test_df,  features, lookback=lb)

print("SeqDataset lengths (boundary-safe):")
print(" train_ds:", len(train_ds))
print(" val_ds:  ", len(val_ds))
print(" test_ds: ", len(test_ds))


train:  2406 tickers |   5036427 rows
val:  2406 tickers |    606112 rows
test:  2406 tickers |   1778231 rows
train:  2406 tickers |   5034021 rows (boundary-safe)
val:  2406 tickers |    603706 rows (boundary-safe)
test:  2406 tickers |   1718649 rows (boundary-safe)
SeqDataset lengths (boundary-safe):
 train_ds: 4880037
 val_ds:   449722
 test_ds:  1564665


## Model (Option A/B/C stubs)

In [None]:
class SeqDataset(Dataset):
    def __init__(self, df: pd.DataFrame, features, lookback=64):
        self.features = features
        self.lookback = lookback
        # group by ticker to avoid crossing boundaries
        self.groups = [g for _, g in df.groupby('Ticker')]
        self.X, self.y = [], []
        for g in self.groups:
            Xg = g[features].values
            yg = g['y'].values
            for i in range(lookback, len(g)):
                self.X.append(Xg[i-lookback:i])
                self.y.append(yg[i])
        self.X = np.array(self.X, dtype=np.float32)
        self.y = np.array(self.y, dtype=np.int64)
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class ConvTransformer(nn.Module):
    def __init__(self, in_feats, d_model=64, nhead=8, dim_ff=128, num_layers=2, num_classes=2, dropout=0.1):
        super().__init__()
        self.conv = nn.Conv1d(in_channels=in_feats, out_channels=d_model, kernel_size=3, padding=1)
        enc_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_ff, dropout=dropout, batch_first=True
        )
        self.encoder = nn.TransformerEncoder(enc_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, x):
        x = x.transpose(1, 2)               # (B,F,T)
        x = torch.relu(self.conv(x))        # (B,d_model,T)
        x = x.transpose(1, 2)               # (B,T,d_model)
        x = self.encoder(x)                 # (B,T,d_model)
        x = x.mean(dim=1)                   # GAP over time
        return self.fc(x)

class CNN_GRU(nn.Module):
    """Option B: 1D-CNN + small GRU head → logits(2)."""
    def __init__(self, in_feats, hidden=32, num_classes=2):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(in_channels=in_feats, out_channels=hidden, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.gru = nn.GRU(input_size=hidden, hidden_size=hidden, batch_first=True)
        self.fc = nn.Linear(hidden, num_classes)
    def forward(self, x):
        x = x.transpose(1, 2)       # (B, F, T)
        x = self.conv(x)            # (B, H, T)
        x = x.transpose(1, 2)       # (B, T, H)
        _, h = self.gru(x)          # h: (1, B, H)
        h = h.squeeze(0)
        return self.fc(h)

def build_model(option: str, in_feats: int):
    if option == 'A':
        return ConvTransformer(in_feats)
    if option == 'B':
        return CNN_GRU(in_feats)
    if option == 'C':
        return None  # handled via sklearn GradientBoostingClassifier
    raise ValueError('Unknown model option')

print('Model stubs ready (A/B in PyTorch, C via sklearn).')


## Train (per split)

In [None]:
def train_torch(model, train_ds, val_ds, epochs, lr, batch_size, device='cuda' if torch.cuda.is_available() else 'cpu'):
    model = model.to(device)
    scaler = torch.cuda.amp.GradScaler(enabled=(device=='cuda'))
    crit = nn.CrossEntropyLoss()
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=2)
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=2)
    best, best_path = -1, os.path.join(OUTPUT_DIR, 'best_model.pt')

    for ep in range(epochs):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad()
            with torch.cuda.amp.autocast(enabled=(device=='cuda')):
                logits = model(xb)
                loss = crit(logits, yb)
            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()

        # val
        model.eval(); correct=total=0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                logits = model(xb)
                pred = logits.argmax(1)
                correct += (pred==yb).sum().item(); total += yb.numel()
        acc = correct/max(total,1)
        print(f'Epoch {ep+1}/{epochs} val_acc={acc:.3f}')
        if acc > best:
            best = acc
            torch.save(model.state_dict(), best_path)
    print('Best val_acc:', best)
    return best_path

def train_sklearn_gbdt(X_train, y_train, X_val, y_val):
    """Option C: simple GradientBoostingClassifier. TODO: Save to outputs/."""
    clf = GradientBoostingClassifier()
    clf.fit(X_train, y_train)
    print('GBDT val acc:', clf.score(X_val, y_val))
    return clf

print('Training stubs ready.')


## Inference & backtest

In [None]:
def infer_proba_torch(model, ds, batch_size=256, device='cuda' if torch.cuda.is_available() else 'cpu'):
    """Return class-1 probabilities for each sample in dataset."""
    model = model.to(device)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=False)
    probs = []
    model.eval()
    with torch.no_grad():
        for xb, _ in loader:
            xb = xb.to(device)
            logits = model(xb)
            p = torch.softmax(logits, dim=1)[:,1].cpu().numpy()
            probs.append(p)
    return np.concatenate(probs)

def positions_from_probs(probs, th_long, th_short):
    """Map probabilities → positions: +1 (long) / 0 (flat) / -1 (short)."""
    pos = np.zeros_like(probs, dtype=int)
    pos[probs >= th_long] = 1
    pos[probs <= th_short] = -1
    return pos

def backtest_from_positions(returns, positions, cost_bps=5):
    """
    Compute PnL with costs per trade. Returns equity curve and trades DataFrame.
    TODO: ensure alignment (positions at t apply to ret at t+1 if using next-day execution).
    """
    positions = positions.astype(int)
    # Shift positions to apply next day
    pos_shift = np.roll(positions, 1)
    pos_shift[0] = 0
    # Trading costs on position changes
    trades = (np.abs(pos_shift[1:] - pos_shift[:-1]) > 0).astype(int)
    cost = trades * (cost_bps / 1e4)
    rets = pos_shift * returns
    rets_adj = rets.copy()
    rets_adj[1:] -= cost  # cost applied when trade occurs
    equity = (1 + rets_adj).cumprod()
    return equity, trades

print('Inference/backtest stubs ready.')


## Metrics & plots

In [None]:
def compute_metrics(equity_curve, daily_rets):
    T = len(equity_curve)
    if T <= 1:
        return {'CAGR': 0, 'MaxDD': 0, 'Sharpe': 0}
    years = T / 252
    cagr = (equity_curve[-1] ** (1/years)) - 1 if years > 0 else 0
    peaks = np.maximum.accumulate(equity_curve)
    dd = (equity_curve / peaks) - 1
    maxdd = dd.min()
    sharpe = (np.mean(daily_rets) / (np.std(daily_rets) + 1e-8)) * np.sqrt(252)
    return {'CAGR': float(cagr), 'MaxDD': float(maxdd), 'Sharpe': float(sharpe)}

def plot_equity(equity_curve, title='Equity Curve', path=os.path.join(OUTPUT_DIR, 'equity_curve.png')):
    plt.figure()
    plt.plot(equity_curve)
    plt.title(title)
    plt.xlabel('Days')
    plt.ylabel('Equity')
    plt.grid(True)
    plt.savefig(path, dpi=150, bbox_inches='tight')
    plt.close()
    return path

print('Metrics/plots stubs ready.')


## Params dump & outputs

In [None]:
def save_run(seed, params, metrics: dict):
    run = {
        'timestamp': dt.datetime.utcnow().isoformat() + 'Z',
        'seed': seed,
        'params': params,
        'metrics': metrics,
    }
    with open(os.path.join(OUTPUT_DIR, 'run.json'), 'w') as f:
        json.dump(run, f, indent=2)
    print('Saved outputs/run.json')

print('Output saving stub ready.')


---
### TODO wiring plan (in this order)
1. Load data for tickers with `load_all_tickers` using `PARAMS` dates.
2. `get_splits` to obtain train/val/test DataFrames.
3. Create `SeqDataset` for train/val/test (sequence length = `PARAMS['lookback']`).
4. Build model via `build_model(PARAMS['model_option'], in_feats=len(PARAMS['features']))`.
5. Train with `train_torch` (or `train_sklearn_gbdt` if Option C).
6. Infer probabilities on **test** set with `infer_proba_torch`.
7. Convert to positions with `positions_from_probs` using thresholds from `PARAMS`.
8. Backtest with `backtest_from_positions` (costs in bps in `PARAMS`).
9. Plot and save equity curve; export CSV of trades (TODO: implement export).
10. Compute and save metrics; dump `run.json` and `params_seed.json` already created.

**No extras. Ship the notebook.**
