# Quickstart
This notebook will:
1) Load `data/x_train.pkl` and `data/x_test.pkl`
2) Train a tiny VAE-style seq2seq model on 60→10 windows from `x_train`
3) During training, run on-train validation:
   - Official metrics: MSE, MAE, IC, IR, SharpeRatio, MDD, VaR, ES
   - Cross-sectional trading snapshots: CSM and LOTQ
4) Run inference on `x_test` and save a PICKLE submission at `sample_submission/submission.pkl`

In [12]:
import os, sys, json, time, warnings
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [13]:
# Paths (adjust if your layout differs)
ROOT = Path.cwd().parent if (Path.cwd().name == 'src') else Path.cwd()
DATA = ROOT / "data"
SRC  = ROOT / "src"
SUBM = ROOT / "sample_submission"

# Ensure src is importable
if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC))

# Create sample_submission dir if missing
SUBM.mkdir(parents=True, exist_ok=True)

SEED = 1337
np.random.seed(SEED)
torch.manual_seed(SEED)

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
DEVICE

'cuda'

In [14]:
# Load dataset files
info_path = DATA / "dataset_info.json"
if info_path.exists():
    info = json.loads(info_path.read_text(encoding="utf-8"))
    print("dataset_info.json loaded. Keys:", list(info.keys()))
    print(json.dumps({k: info[k] for k in ['features','input_len','horizon_len','outputs']}, indent=2))
else:
    print("dataset_info.json not found at", info_path)

# Peek x_train / x_test
x_train_path = DATA / "x_train.pkl"
x_test_path  = DATA / "x_test.pkl"

x_train = pd.read_pickle(x_train_path)
x_test  = pd.read_pickle(x_test_path)

print("x_train shape:", x_train.shape, "| columns:", x_train.columns.tolist())
print("x_test  shape:", x_test.shape,  "| columns:", x_test.columns.tolist())

display(x_train.head(3))
display(x_test.head(3))

dataset_info.json loaded. Keys: ['freq', 'features', 'input_len', 'horizon_len', 'test_windowing', 'shuffle_seed', 'dtypes', 'outputs', 'sha256']
{
  "features": [
    "close",
    "volume"
  ],
  "input_len": 60,
  "horizon_len": 10,
  "outputs": {
    "x_train": {
      "columns": [
        "series_id",
        "time_step",
        "close",
        "volume"
      ]
    },
    "x_test": {
      "columns": [
        "window_id",
        "time_step",
        "close",
        "volume"
      ]
    },
    "y_test": {
      "columns": [
        "window_id",
        "time_step",
        "close"
      ]
    }
  }
}
x_train shape: (18331224, 4) | columns: ['series_id', 'time_step', 'close', 'volume']
x_test  shape: (3000000, 4) | columns: ['window_id', 'time_step', 'close', 'volume']


Unnamed: 0,series_id,time_step,close,volume
0,0,0,0.137,171985.703125
1,0,1,0.13656,85451.398438
2,0,2,0.13647,121151.898438


Unnamed: 0,window_id,time_step,close,volume
0,1,0,0.1126,24976.0
1,1,1,0.1126,0.0
2,1,2,0.1125,2299.0


In [15]:
# Use the sampler logic from src/dataset.py to slice windows
from dataset import TrainWindowSampler

class WindowsDataset(Dataset):
    """
    Wrap TrainWindowSampler into a PyTorch Dataset.
    Returns:
      X: (60, 2) float32 -> [close, volume]
      y: (10,)  float32 -> future close
    """
    def __init__(self, x_train_path: str, rolling: bool = True, step_size: int = 1, max_samples: int = None):
        self.sampler = TrainWindowSampler(
            x_train_path=x_train_path,
            window=70,
            input_len=60,
            horizon_len=10,
            rolling=rolling,
            step_size=step_size,
            seed=SEED,
        )
        # Materialize (optionally capped) for stable batching
        xs, ys = [], []
        for i, (X, y) in enumerate(self.sampler.iter_windows()):
            xs.append(X.astype(np.float32))
            ys.append(y.astype(np.float32))
            if max_samples is not None and (i + 1) >= max_samples:
                break
        self.X = np.stack(xs, axis=0) if xs else np.zeros((0,60,2), dtype=np.float32)
        self.y = np.stack(ys, axis=0) if ys else np.zeros((0,10), dtype=np.float32)

    def __len__(self):  return len(self.X)
    def __getitem__(self, i):
        return torch.from_numpy(self.X[i]), torch.from_numpy(self.y[i])

# For a quick demo, cap samples. Increase for better quality.
MAX_SAMPLES = 50_000  # set to None to use all windows
train_ds = WindowsDataset(str(x_train_path), rolling=True, step_size=1, max_samples=MAX_SAMPLES)
len(train_ds), train_ds.X.shape, train_ds.y.shape

(50000, (50000, 60, 2), (50000, 10))

In [16]:
# Build a tiny validation set from x_train for on-train evaluation
# We reuse TrainWindowSampler and materialize ~N windows into memory.

def build_val_windows(x_train_path: str, max_val_windows: int = 2048):
    sampler = TrainWindowSampler(
        x_train_path=x_train_path,
        window=70, input_len=60, horizon_len=10,
        rolling=True, step_size=50, seed=2025  # sparser step for speed
    )
    Xs, Ys, base_closes = [], [], []
    cnt = 0
    for X, y in sampler.iter_windows():
        Xs.append(X.astype(np.float32))
        Ys.append(y.astype(np.float32))
        base_closes.append(np.float32(X[-1, 0]))  # last input close -> base_close
        cnt += 1
        if cnt >= max_val_windows:
            break
    if not Xs:
        raise ValueError("No validation windows produced.")
    X = np.stack(Xs, axis=0)           # (N,60,2)
    Y = np.stack(Ys, axis=0)           # (N,10)
    B = np.asarray(base_closes)        # (N,)
    return X, Y, B

X_val, Y_val, B_val = build_val_windows(str(x_train_path), max_val_windows=2048)
X_val.shape, Y_val.shape, B_val.shape

((2048, 60, 2), (2048, 10), (2048,))

In [17]:
from baselines.vae_based import TinyTimeVAE, vae_loss

# DataLoader
BATCH_SIZE = 512
NUM_EPOCHS = 3           # increase for better quality
LR = 1e-3
BETA = 1e-3              # KL weight
GRAD_CLIP = 1.0

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=0)

# Model & optimizer
model = TinyTimeVAE(input_dim=2, hidden=64, latent=16, input_len=60, horizon=10).to(DEVICE)
opt = torch.optim.Adam(model.parameters(), lr=LR)

# Train
for epoch in range(1, NUM_EPOCHS + 1):
    model.train()
    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{NUM_EPOCHS}", leave=False)
    loss_avg, recon_avg, kld_avg, n_steps = 0.0, 0.0, 0.0, 0

    for X, y in pbar:
        X = X.to(torch.float32).to(DEVICE)     # (B,60,2)
        y = y.to(torch.float32).to(DEVICE)     # (B,10)
        opt.zero_grad(set_to_none=True)
        pred, mu, logvar = model(X)
        loss, recon, kld = vae_loss(pred, y, mu, logvar, beta=BETA)
        loss.backward()
        if GRAD_CLIP is not None:
            nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP)
        opt.step()

        # logging
        n_steps += 1
        loss_avg  += (loss.item() - loss_avg) / n_steps
        recon_avg += (recon - recon_avg) / n_steps
        kld_avg   += (kld   - kld_avg)   / n_steps
        pbar.set_postfix(loss=f"{loss_avg:.5f}", recon=f"{recon_avg:.5f}", kld=f"{kld_avg:.5f}")

    print(f"[Epoch {epoch}] loss={loss_avg:.6f} recon={recon_avg:.6f} kld={kld_avg:.6f}")

# Optionally save a checkpoint
CKPT = SUBM / "tinytimevae_demo.pt"
torch.save(model.state_dict(), CKPT)
print("Saved checkpoint to:", CKPT)

Epoch 1/3:   0%|          | 0/97 [00:00<?, ?it/s]

[Epoch 1] loss=0.003801 recon=0.003673 kld=0.127999


Epoch 2/3:   0%|          | 0/97 [00:00<?, ?it/s]

[Epoch 2] loss=0.000267 recon=0.000202 kld=0.065022


Epoch 3/3:   0%|          | 0/97 [00:00<?, ?it/s]

[Epoch 3] loss=0.000195 recon=0.000178 kld=0.016385
Saved checkpoint to: c:\Users\xyy31\Desktop\ICAIF_2025_Cryptocurrency_Forecasting_Starter_Kit\sample_submission\tinytimevae_demo.pt


In [18]:
# Convert a minibatch into y_true / y_pred / x_like frames compatible with metrics & trading

def batch_to_eval_frames(pred: np.ndarray, y_true: np.ndarray, base_close: np.ndarray, start_id: int = 0):
    """
    pred: (N,10) predicted close
    y_true: (N,10) true close
    base_close: (N,) last input close used as base
    Returns:
      y_true_df: ['window_id','time_step','close']
      y_pred_df: ['window_id','time_step','pred_close']
      x_like_df: ['window_id','time_step','close']  # only time_step=59 rows for base_close
      y_true_with_base: ['window_id','time_step','close','base_close']  # for trading.py
    """
    N, H = y_true.shape
    wids = np.arange(start_id, start_id + N, dtype=np.int32)

    rows_true, rows_pred, rows_xlike, rows_true_base = [], [], [], []
    for i in range(N):
        wid = int(wids[i])
        rows_xlike.append({'window_id': wid, 'time_step': 59, 'close': float(base_close[i])})
        for h in range(H):
            rows_true.append({'window_id': wid, 'time_step': h, 'close': float(y_true[i, h])})
            rows_pred.append({'window_id': wid, 'time_step': h, 'pred_close': float(pred[i, h])})
            rows_true_base.append({
                'window_id': wid, 'time_step': h,
                'close': float(y_true[i, h]),
                'base_close': float(base_close[i])
            })
    y_true_df = pd.DataFrame(rows_true)
    y_pred_df = pd.DataFrame(rows_pred)
    x_like_df = pd.DataFrame(rows_xlike)
    y_true_with_base = pd.DataFrame(rows_true_base)

    for df in (y_true_df, y_pred_df, x_like_df, y_true_with_base):
        if 'window_id' in df: df['window_id'] = df['window_id'].astype('int32')
        if 'time_step' in df: df['time_step'] = df['time_step'].astype('int8')
        if 'close' in df: df['close'] = df['close'].astype('float32')
        if 'pred_close' in df: df['pred_close'] = df['pred_close'].astype('float32')
        if 'base_close' in df: df['base_close'] = df['base_close'].astype('float32')
    return y_true_df, y_pred_df, x_like_df, y_true_with_base


In [19]:
# Evaluation hook: official metrics + trading (CSM, LOTQ)
from metrics import evaluate_all_metrics   # expects y_true, y_pred, x_like (for base_close)
import trading as tr                       # expects y_true with base_close inside

def evaluate_on_val(model, X_val: np.ndarray, Y_val: np.ndarray, B_val: np.ndarray, device='cpu',
                    max_batches: int = 4, batch_size: int = 256, horizon_step: int = 0):
    """
    Run a few mini-batches of validation to keep cost small.
    Returns a dict with official metrics and simple CSM/LOTQ trading snapshots.
    """
    model.eval()
    N = X_val.shape[0]
    idx = np.arange(N)
    np.random.shuffle(idx)

    preds_all, trues_all, bases_all = [], [], []
    with torch.no_grad():
        for bi in range(min(max_batches, int(np.ceil(N / batch_size)))):
            sel = idx[bi*batch_size : (bi+1)*batch_size]
            x = torch.from_numpy(X_val[sel]).to(device)
            y = torch.from_numpy(Y_val[sel]).to(device)
            pred, _, _ = model(x)  # (B,10)
            preds_all.append(pred.detach().cpu().numpy())
            trues_all.append(y.detach().cpu().numpy())
            bases_all.append(B_val[sel])

    if not preds_all:
        return {}

    P = np.concatenate(preds_all, axis=0)   # (M,10)
    T = np.concatenate(trues_all, axis=0)   # (M,10)
    B = np.concatenate(bases_all, axis=0)   # (M,)

    y_true_df, y_pred_df, x_like_df, y_true_with_base = batch_to_eval_frames(P, T, B, start_id=0)

    off_stats = evaluate_all_metrics(y_true_df, y_pred_df, x_like_df)

    # Trading snapshots at a chosen horizon (e.g., h=0)
    ret_csm  = tr.CSM (y_true_with_base, y_pred_df, horizon_step=horizon_step)  # np.ndarray with one value
    ret_lotq = tr.LOTQ(y_true_with_base, y_pred_df, horizon_step=horizon_step)

    off_stats.update({
        "CSM_return":  float(ret_csm.mean()),
        "LOTQ_return": float(ret_lotq.mean()),
    })
    return off_stats

In [20]:
# DataLoader
BATCH_SIZE = 512
NUM_EPOCHS = 3           # increase for better quality
LR = 1e-3
BETA = 1e-3              # KL weight
GRAD_CLIP = 1.0

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=0)

# Model & optimizer
model = TinyTimeVAE(input_dim=2, hidden=64, latent=16, input_len=60, horizon=10).to(DEVICE)
opt = torch.optim.Adam(model.parameters(), lr=LR)

# Train with periodic evaluation
for epoch in range(1, NUM_EPOCHS + 1):
    model.train()
    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{NUM_EPOCHS}", leave=False)
    loss_avg, recon_avg, kld_avg, n_steps = 0.0, 0.0, 0.0, 0

    for X, y in pbar:
        X = X.to(torch.float32).to(DEVICE)     # (B,60,2)
        y = y.to(torch.float32).to(DEVICE)     # (B,10)
        opt.zero_grad(set_to_none=True)
        pred, mu, logvar = model(X)
        loss, recon, kld = vae_loss(pred, y, mu, logvar, beta=BETA)
        loss.backward()
        if GRAD_CLIP is not None:
            nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP)
        opt.step()

        n_steps += 1
        loss_avg  += (loss.item() - loss_avg) / n_steps
        recon_avg += (recon - recon_avg) / n_steps
        kld_avg   += (kld   - kld_avg)   / n_steps
        pbar.set_postfix(loss=f"{loss_avg:.5f}", recon=f"{recon_avg:.5f}", kld=f"{kld_avg:.5f}")

    print(f"[Epoch {epoch}] loss={loss_avg:.6f} recon={recon_avg:.6f} kld={kld_avg:.6f}")

    # On-train evaluation (small batches for speed)
    stats = evaluate_on_val(model, X_val, Y_val, B_val, device=DEVICE,
                            max_batches=4, batch_size=256, horizon_step=0)
    print({k: round(v, 6) for k, v in stats.items()})

# Optionally save a checkpoint (not required for submission)
CKPT = SUBM / "tinytimevae_demo.pt"
torch.save(model.state_dict(), CKPT)
print("Saved checkpoint to:", CKPT)

Epoch 1/3:   0%|          | 0/97 [00:00<?, ?it/s]

[Epoch 1] loss=0.000856 recon=0.000773 kld=0.082517
{'MSE': 0.000602, 'MAE': 0.01717, 'IC': 0.034383, 'IR': 4.450314, 'SharpeRatio': 1.667421, 'MDD': 0.0, 'VaR': -0.005678, 'ES': -0.008892, 'CSM_return': -7.8e-05, 'LOTQ_return': -0.000147}


Epoch 2/3:   0%|          | 0/97 [00:00<?, ?it/s]

[Epoch 2] loss=0.000173 recon=0.000162 kld=0.011064
{'MSE': 0.000587, 'MAE': 0.016784, 'IC': -0.00442, 'IR': -0.159909, 'SharpeRatio': 0.103426, 'MDD': 2.9e-05, 'VaR': -0.005792, 'ES': -0.008905, 'CSM_return': -0.000173, 'LOTQ_return': -5.8e-05}


Epoch 3/3:   0%|          | 0/97 [00:00<?, ?it/s]

[Epoch 3] loss=0.000154 recon=0.000151 kld=0.002188
{'MSE': 0.000558, 'MAE': 0.016134, 'IC': 0.039804, 'IR': 1.376338, 'SharpeRatio': 1.728516, 'MDD': 0.000751, 'VaR': -0.005675, 'ES': -0.009007, 'CSM_return': -0.000309, 'LOTQ_return': -8e-06}
Saved checkpoint to: c:\Users\xyy31\Desktop\ICAIF_2025_Cryptocurrency_Forecasting_Starter_Kit\sample_submission\tinytimevae_demo.pt


In [None]:
# Fast preview inference on a subset of x_test (NOT for official submission).
# For official submission, run full inference over all windows.

model.eval()
FIRST_N_WINDOWS = 500       # set to an integer (e.g., 500). Set to None to disable.
RANDOM_SEED     = 1337

all_wids = x_test['window_id'].drop_duplicates().astype('int32').to_numpy()
sel_wids = all_wids[:int(FIRST_N_WINDOWS)] if FIRST_N_WINDOWS is not None else all_wids # you need to run on all windows for official submission
print(f"Infer on {len(sel_wids)} / {len(all_wids)} windows")

out_rows = []
it = x_test[x_test['window_id'].isin(sel_wids)].groupby('window_id', sort=False)
for wid, g in tqdm(it, total=len(sel_wids), desc="Infer (subset)"):
    g = g.sort_values('time_step')  # expect 0..59
    if g['time_step'].nunique() < 60:
        # skip malformed windows in preview mode
        continue

    x_np = g[['close','volume']].to_numpy(np.float32)[None, ...]  # (1,60,2)
    with torch.no_grad():
        xt = torch.from_numpy(x_np).to(DEVICE)
        pred, _, _ = model(xt)  # (1,10)
        pred = pred.squeeze(0).detach().cpu().numpy()  # (10,)

    for h in range(10):
        out_rows.append({
            'window_id': np.int32(wid),
            'time_step': np.int8(h),
            'pred_close': float(pred[h])
        })

submission_preview = pd.DataFrame(out_rows, columns=['window_id','time_step','pred_close'])
submission_preview['window_id']  = submission_preview['window_id'].astype('int32')
submission_preview['time_step']  = submission_preview['time_step'].astype('int8')
submission_preview['pred_close'] = submission_preview['pred_close'].astype('float32')

# Basic validation for the selected windows only
counts = submission_preview.groupby('window_id')['time_step'].nunique()
assert (counts == 10).all(), "Each selected window_id must have exactly 10 rows (0..9)."

# Save preview (NOT for official submission)
# For official submission, run inference on ALL windows and save to sample_submission/submission.pkl
# out_path = SUBM / "submission.pkl"
out_path = SUBM / "submission_example.pkl"
submission_preview.to_pickle(out_path)
print(f"Saved preview to {out_path}  rows={len(submission_preview)}  "
      f"windows={submission_preview['window_id'].nunique()}")
display(submission_preview.head(12))

print("NOTE: This is a PREVIEW subset. For official submission, you must run full inference on ALL windows.")

Infer on 500 / 50000 windows


Infer (subset):   0%|          | 0/500 [00:00<?, ?it/s]

Saved preview to c:\Users\xyy31\Desktop\ICAIF_2025_Cryptocurrency_Forecasting_Starter_Kit\sample_submission\submission_example.pkl  rows=5000  windows=500


Unnamed: 0,window_id,time_step,pred_close
0,1,0,0.092288
1,1,1,0.108378
2,1,2,0.111337
3,1,3,0.11207
4,1,4,0.11258
5,1,5,0.113027
6,1,6,0.113353
7,1,7,0.113555
8,1,8,0.113663
9,1,9,0.113713


NOTE: This is a PREVIEW subset. For official submission, you must run full inference on ALL windows.
