In [1]:
# ============================================================
# Core numerical libraries
# ============================================================
import numpy as np
import pandas as pd

# ============================================================
# PyTorch (MLP model)
# ============================================================
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# ============================================================
# Scaling and metrics
# ============================================================
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

# ============================================================
# Reproducibility
# ============================================================
torch.manual_seed(42)
np.random.seed(42)

In [2]:
# Choose GPU if available (WSL should expose GPU through NVIDIA drivers)
# However since pytorch cuda = 12.8 work with latest blackwell RTX5080
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
if device.type == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))

Using device: cuda
GPU: NVIDIA GeForce RTX 5080


In [3]:
# ============================================================
# Load CLEAN datasets (already prepared earlier)
# ------------------------------------------------------------
from pathlib import Path

DATA_DIR = Path("../data")

ASSETS = {
    "SPY": {
        "options": DATA_DIR / "options_clean_SPY.csv",
        "daily":   DATA_DIR / "spy_with_garch.csv",
        "daily_date_col": "Date",
        "daily_close_col": "Close"
    },
    "AAPL": {
        "options": DATA_DIR / "options_clean_AAPL.csv",
        "daily":   DATA_DIR / "aapl_with_garch.csv",  # you must create this in Notebook 02
        "daily_date_col": "Date",
        "daily_close_col": "Close"
    }
}

datasets = {}

for asset, cfg in ASSETS.items():
    opts = pd.read_csv(cfg["options"])
    daily = pd.read_csv(cfg["daily"], parse_dates=[cfg["daily_date_col"]])

    opts["QUOTE_DATE"]  = pd.to_datetime(opts["QUOTE_DATE"],  format="mixed", errors="coerce")
    opts["EXPIRE_DATE"] = pd.to_datetime(opts["EXPIRE_DATE"], format="mixed", errors="coerce")
    opts = opts.dropna(subset=["QUOTE_DATE", "EXPIRE_DATE"])

    daily = daily.sort_values(cfg["daily_date_col"]).reset_index(drop=True)

    datasets[asset] = {
        "opts": opts,
        "daily": daily
    }

    print(f"[{asset}] options rows:", len(opts))
    print(f"[{asset}] daily rows:", len(daily))

[SPY] options rows: 4195810
[SPY] daily rows: 3521
[AAPL] options rows: 1562105
[AAPL] daily rows: 2011


In [4]:
# ============================================================
# Merge SPY and AAPL macro features into option dataset
# ------------------------------------------------------------
# Why?
# - Options prices depend on:
#   • underlying level
#   • market returns
#   • volatility regime
# ============================================================

merged_data = {}

for asset, data_dict in datasets.items():
    opts = data_dict["opts"]
    daily = data_dict["daily"]

    daily_small = daily[["Date", "Close", "log_return", "garch_vol"]].copy()
    daily_small = daily_small.rename(columns={
        "Date": "QUOTE_DATE",
        "Close": f"{asset}_CLOSE"
    })

    opts["QUOTE_DATE"] = pd.to_datetime(opts["QUOTE_DATE"])
    daily_small["QUOTE_DATE"] = pd.to_datetime(daily_small["QUOTE_DATE"])

    merged = opts.merge(daily_small, on="QUOTE_DATE", how="left")
    merged = merged.dropna(subset=["log_return", "garch_vol", f"{asset}_CLOSE"])

    merged_data[asset] = merged

    print(f"[{asset}] merged rows:", len(merged))


[SPY] merged rows: 4169845
[AAPL] merged rows: 1552960


In [5]:
# ============================================================
# Black–Scholes pricing functions
# ------------------------------------------------------------
# Used as a structural baseline.
# ML models will learn only the residual.
# ============================================================

from scipy.stats import norm

def black_scholes_price(S, K, T, r, sigma, option_type="call"):
    """
    Parameters
    ----------
    S : spot price
    K : strike
    T : time to maturity (years)
    r : risk-free rate
    sigma : volatility
    option_type : "call" or "put"
    """

    eps = 1e-8
    T = np.maximum(T, eps)
    sigma = np.maximum(sigma, eps)

    d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)

    if option_type == "call":
        return S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    else:
        return K * np.exp(-r * T) * norm.cdf(-d2) - S * norm.cdf(-d1)


In [6]:
# ============================================================
# Feature selection
# ------------------------------------------------------------
# These are standard option pricing drivers available at time t:
#
# UNDERLYING_LAST → option underlying spot
# log_return     → return shock of the underlying
# garch_vol      → volatility regime indicator
# DTE            → time to maturity (theta decay)
# MONEINESS      → intrinsic structure of the option
#
# NOTE:
# - These features are used to PREDICT future option prices
# - Target creation is done LATER (per asset and option type)
# ============================================================

feature_cols_base = [
    "UNDERLYING_LAST",
    "log_return",
    "garch_vol",
    "DTE",
    "MONEINESS"
]

# ============================================================
# Target configuration (do NOT create targets here)
# ------------------------------------------------------------
# We train SEPARATE models for:
# - Call options (C_MID)
# - Put  options (P_MID)
#
# Target is the NEXT-STEP log return of the option price,
# constructed later inside the per-asset split loop.
# ============================================================

TARGETS = {
    "CALL": "C_MID",
    "PUT":  "P_MID"
}

In [7]:
# ============================================================
# 80/10/10 split (time-based split)
# ============================================================

splits = {}

for asset, df in merged_data.items():
    df = df.sort_values("QUOTE_DATE").reset_index(drop=True)

    unique_dates = df["QUOTE_DATE"].sort_values().unique()
    n_dates = len(unique_dates)

    train_cut = int(0.8 * n_dates)
    val_cut   = int(0.9 * n_dates)

    train_dates = unique_dates[:train_cut]
    val_dates   = unique_dates[train_cut:val_cut]
    test_dates  = unique_dates[val_cut:]

    train_data = df[df["QUOTE_DATE"].isin(train_dates)]
    val_data   = df[df["QUOTE_DATE"].isin(val_dates)]
    test_data  = df[df["QUOTE_DATE"].isin(test_dates)]

    splits[asset] = {
        "train": train_data,
        "val":   val_data,
        "test":  test_data
    }

    print(f"\n[{asset}] split:")
    print("  Train:", len(train_data))
    print("  Val  :", len(val_data))
    print("  Test :", len(test_data))


[SPY] split:
  Train: 2330311
  Val  : 1024018
  Test : 815516

[AAPL] split:
  Train: 1204151
  Val  : 161572
  Test : 187237


In [8]:
# ============================================================
# Pure Black–Scholes benchmark (NO ML)
# ============================================================
# This cell evaluates how well Black–Scholes alone prices options.
# It provides the baseline that ML must beat.
# ============================================================

RISK_FREE_RATE = 0.02   # <-- DEFINE HERE (FIXES NameError)

print("\n==============================")
print(" PURE BLACK–SCHOLES BENCHMARK ")
print("==============================")

for split_name in ["val", "test"]:
    print(f"\n--- {split_name.upper()} ---")

    for asset, asset_data in splits.items():
        for opt_type, price_col in TARGETS.items():

            # ------------------------------------------------
            # Select correct split and clean prices
            # ------------------------------------------------
            df = asset_data[split_name].copy()
            df = df.loc[df[price_col] > 0].copy()

            # ------------------------------------------------
            # Option type
            # ------------------------------------------------
            option_flag = "call" if opt_type == "CALL" else "put"

            # ------------------------------------------------
            # Black–Scholes price
            # ------------------------------------------------
            bs_prices = black_scholes_price(
                S=df["UNDERLYING_LAST"].values,
                K=df["STRIKE"].values,
                T=df["DTE"].values / 365.0,
                r=RISK_FREE_RATE,
                sigma=df["garch_vol"].values,
                option_type=option_flag
            )

            true_prices = df[price_col].values

            # ------------------------------------------------
            # Metrics
            # ------------------------------------------------
            mae  = mean_absolute_error(true_prices, bs_prices)
            rmse = np.sqrt(mean_squared_error(true_prices, bs_prices))

            mae_pct  = 100 * mae / np.mean(true_prices)
            rmse_pct = 100 * rmse / np.mean(true_prices)

            print(
                f"{asset} {opt_type} | "
                f"BS MAE: {mae:.3f} ({mae_pct:.2f}%) | "
                f"RMSE: {rmse:.3f} ({rmse_pct:.2f}%)"
            )



 PURE BLACK–SCHOLES BENCHMARK 

--- VAL ---
SPY CALL | BS MAE: 5.102 (9.84%) | RMSE: 8.583 (16.56%)
SPY PUT | BS MAE: 8.841 (37.13%) | RMSE: 13.711 (57.59%)
AAPL CALL | BS MAE: 2.897 (8.59%) | RMSE: 5.650 (16.76%)
AAPL PUT | BS MAE: 3.838 (15.09%) | RMSE: 6.728 (26.45%)

--- TEST ---
SPY CALL | BS MAE: 7.069 (13.64%) | RMSE: 11.651 (22.48%)
SPY PUT | BS MAE: 6.266 (30.91%) | RMSE: 10.160 (50.11%)
AAPL CALL | BS MAE: 3.219 (13.41%) | RMSE: 6.049 (25.21%)
AAPL PUT | BS MAE: 3.335 (9.78%) | RMSE: 5.505 (16.15%)


In [9]:
# ============================================================
# Target construction: Black–Scholes residuals (CLIPPED)
# ------------------------------------------------------------
# Target = log(C_market / C_BS)
# Clipping prevents exponential blow-ups during reconstruction
# ============================================================

scaled = {}

RISK_FREE_RATE = 0.02
EPS = 1e-8
RESID_CLIP = 1.0   # <-- KEY CHANGE (exp(±1) ≈ ×2.7)

for asset, split in splits.items():
    X_cols = feature_cols_base + [f"{asset}_CLOSE"]
    scaled[asset] = {}

    for opt_type, price_col in TARGETS.items():

        df_train = split["train"].copy()
        df_val   = split["val"].copy()
        df_test  = split["test"].copy()

        option_flag = "call" if opt_type == "CALL" else "put"

        for df_ in (df_train, df_val, df_test):

            # ---- BS price at time t ----
            df_["BS_PRICE"] = black_scholes_price(
                S=df_["UNDERLYING_LAST"].values,
                K=df_["STRIKE"].values,
                T=df_["DTE"].values / 365.0,
                r=RISK_FREE_RATE,
                sigma=df_["garch_vol"].values,
                option_type=option_flag
            )

            # ---- Log residual target ----
            raw_resid = np.log(
                (df_[price_col] + EPS) /
                (df_["BS_PRICE"] + EPS)
            )

            # ---- CLIP residuals ----
            df_["TARGET"] = np.clip(raw_resid, -RESID_CLIP, RESID_CLIP)

            df_.replace([np.inf, -np.inf], np.nan, inplace=True)
            df_.dropna(inplace=True)

        scaler_X = StandardScaler()
        scaler_y = StandardScaler()

        X_train = scaler_X.fit_transform(df_train[X_cols])
        X_val   = scaler_X.transform(df_val[X_cols])
        X_test  = scaler_X.transform(df_test[X_cols])

        y_train = scaler_y.fit_transform(df_train[["TARGET"]])
        y_val   = scaler_y.transform(df_val[["TARGET"]])
        y_test  = scaler_y.transform(df_test[["TARGET"]])

        scaled[asset][opt_type] = {
            "X": (X_train, X_val, X_test),
            "y": (y_train, y_val, y_test),
            "scalers": (scaler_X, scaler_y),
            "features": X_cols,
            "price_col": price_col
        }

        print(f"[{asset} | {opt_type}] BS residuals (clipped) ready")


[SPY | CALL] BS residuals (clipped) ready
[SPY | PUT] BS residuals (clipped) ready
[AAPL | CALL] BS residuals (clipped) ready
[AAPL | PUT] BS residuals (clipped) ready


In [10]:
# ============================================================
# Torch Dataset wrapper
# ============================================================

class OptionDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [11]:
# --------------------------------------------------------
# PyTorch Dataset + DataLoader
# --------------------------------------------------------
# Notes:
# - Keep tensors on CPU here (DataLoader works best on CPU tensors)
# - Move batches to GPU inside the training loop (non_blocking=True)
# - pin_memory=True speeds up CPU->GPU transfer
# --------------------------------------------------------
from torch.utils.data import DataLoader, TensorDataset

loaders = {}

for asset, asset_data in scaled.items():
    loaders[asset] = {}

    for opt_type, data in asset_data.items():
        X_train, X_val, X_test = data["X"]
        y_train, y_val, y_test = data["y"]

        X_train_t = torch.tensor(X_train, dtype=torch.float32)
        y_train_t = torch.tensor(y_train, dtype=torch.float32)
        X_val_t   = torch.tensor(X_val,   dtype=torch.float32)
        y_val_t   = torch.tensor(y_val,   dtype=torch.float32)
        X_test_t  = torch.tensor(X_test,  dtype=torch.float32)
        y_test_t  = torch.tensor(y_test,  dtype=torch.float32)

        train_ds = TensorDataset(X_train_t, y_train_t)
        val_ds   = TensorDataset(X_val_t,   y_val_t)
        test_ds  = TensorDataset(X_test_t,  y_test_t)

        pin = (device.type == "cuda")

        loaders[asset][opt_type] = {
            "train": DataLoader(
                train_ds,
                batch_size=4096,
                shuffle=True,
                num_workers=2,
                pin_memory=pin,
                persistent_workers=True
            ),
            "val": DataLoader(
                val_ds,
                batch_size=4096,
                shuffle=False,
                num_workers=2,
                pin_memory=pin,
                persistent_workers=True
            ),
            "test": DataLoader(
                test_ds,
                batch_size=4096,
                shuffle=False,
                num_workers=2,
                pin_memory=pin,
                persistent_workers=True
            )
        }

        print(f"[{asset} | {opt_type}] loaders ready")


[SPY | CALL] loaders ready
[SPY | PUT] loaders ready
[AAPL | CALL] loaders ready
[AAPL | PUT] loaders ready


In [12]:
# ============================================================
# MLP baseline
# ------------------------------------------------------------
# This sets a performance floor.
# If Transformer can't beat this → it's not worth the complexity.
# ============================================================

class MLP(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_features, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.net(x)

In [13]:
# ============================================================
# Transformer model (time-independent feature Transformer)
# ------------------------------------------------------------
# Uses self-attention over feature dimensions, not sequences.
# Keeps dataset identical to MLP.
# ============================================================

class TransformerRegressor(nn.Module):
    def __init__(self, n_features, d_model=32, nhead=4, num_layers=2):
        super().__init__()

        self.embed = nn.Linear(1, d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            batch_first=True
        )
        self.encoder = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_layers
        )
        self.fc = nn.Linear(d_model * n_features, 1)

    def forward(self, x):
        # x: (batch, n_features)
        x = x.unsqueeze(-1)                 # (B, F, 1)
        x = self.embed(x)                   # (B, F, d_model)
        x = self.encoder(x)                 # (B, F, d_model)
        x = x.flatten(start_dim=1)          # (B, F*d_model)
        return self.fc(x)


In [14]:
# ============================================================
# Training MLP models (WEIGHTED LOSS)
# ============================================================

EPOCHS = 10
results = {}

for asset, asset_data in scaled.items():
    results[asset] = {}

    for opt_type, data in asset_data.items():
        print(f"\n==============================")
        print(f" Training MLP for {asset} {opt_type}")
        print(f"==============================")

        train_loader = loaders[asset][opt_type]["train"]
        val_loader   = loaders[asset][opt_type]["val"]

        model = MLP(n_features=len(data["features"])).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

        best_val_mse = float("inf")
        best_state = None
        patience = 2
        patience_ctr = 0

        for epoch in range(EPOCHS):

            # -------- TRAIN --------
            model.train()
            train_losses = []

            for Xb, yb in train_loader:
                Xb = Xb.to(device, non_blocking=True)
                yb = yb.to(device, non_blocking=True)

                optimizer.zero_grad(set_to_none=True)
                preds = model(Xb)

                # -------- WEIGHTED MSE (KEY FIX) --------
                weights = torch.clamp(torch.exp(-torch.abs(yb)), max=5.0)
                loss = (weights * (preds - yb) ** 2).mean()

                loss.backward()
                optimizer.step()

                train_losses.append(loss.item())

            # -------- VALIDATE --------
            model.eval()
            val_losses = []

            with torch.no_grad():
                for Xb, yb in val_loader:
                    Xb = Xb.to(device, non_blocking=True)
                    yb = yb.to(device, non_blocking=True)
                    preds = model(Xb)
                    val_losses.append(((preds - yb) ** 2).mean().item())

            train_mse = np.mean(train_losses)
            val_mse   = np.mean(val_losses)

            print(
                f"Epoch {epoch+1:02d} | "
                f"Train MSE: {train_mse:.6f} | "
                f"Val MSE: {val_mse:.6f}"
            )

            if val_mse < best_val_mse:
                best_val_mse = val_mse
                best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
                patience_ctr = 0
            else:
                patience_ctr += 1
                if patience_ctr >= patience:
                    print("Early stopping triggered")
                    break

        model.load_state_dict(best_state)
        model.to(device)

        results[asset][opt_type] = {
            "model": model,
            "best_val_mse": best_val_mse
        }



 Training MLP for SPY CALL
Epoch 01 | Train MSE: 0.064965 | Val MSE: 0.037321
Epoch 02 | Train MSE: 0.011377 | Val MSE: 0.019707
Epoch 03 | Train MSE: 0.009334 | Val MSE: 0.018772
Epoch 04 | Train MSE: 0.008584 | Val MSE: 0.015925
Epoch 05 | Train MSE: 0.008173 | Val MSE: 0.016178
Epoch 06 | Train MSE: 0.007997 | Val MSE: 0.013238
Epoch 07 | Train MSE: 0.007732 | Val MSE: 0.013213
Epoch 08 | Train MSE: 0.007533 | Val MSE: 0.012146
Epoch 09 | Train MSE: 0.007465 | Val MSE: 0.011936
Epoch 10 | Train MSE: 0.007293 | Val MSE: 0.012622

 Training MLP for SPY PUT
Epoch 01 | Train MSE: 0.069261 | Val MSE: 0.173126
Epoch 02 | Train MSE: 0.013290 | Val MSE: 0.069413
Epoch 03 | Train MSE: 0.007597 | Val MSE: 0.033945
Epoch 04 | Train MSE: 0.006609 | Val MSE: 0.035345
Epoch 05 | Train MSE: 0.006096 | Val MSE: 0.026810
Epoch 06 | Train MSE: 0.005838 | Val MSE: 0.024536
Epoch 07 | Train MSE: 0.005695 | Val MSE: 0.024690
Epoch 08 | Train MSE: 0.005508 | Val MSE: 0.020029
Epoch 09 | Train MSE: 0.005

In [15]:
# ============================================================
# Training Transformer models (WEIGHTED LOSS)
# ============================================================

transformer_results = {}

for asset, asset_data in scaled.items():
    transformer_results[asset] = {}

    for opt_type, data in asset_data.items():
        print(f"\n==============================")
        print(f" Training Transformer for {asset} {opt_type}")
        print(f"==============================")

        train_loader = loaders[asset][opt_type]["train"]
        val_loader   = loaders[asset][opt_type]["val"]

        model = TransformerRegressor(
            n_features=len(data["features"])
        ).to(device)

        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

        best_val_mse = float("inf")
        best_state = None
        patience = 2
        patience_ctr = 0

        for epoch in range(EPOCHS):

            # -------- TRAIN --------
            model.train()
            train_losses = []

            for Xb, yb in train_loader:
                Xb = Xb.to(device, non_blocking=True)
                yb = yb.to(device, non_blocking=True)

                optimizer.zero_grad(set_to_none=True)
                preds = model(Xb)

                weights = torch.clamp(torch.exp(-torch.abs(yb)), max=5.0)
                loss = (weights * (preds - yb) ** 2).mean()

                loss.backward()
                optimizer.step()

                train_losses.append(loss.item())

            # -------- VALIDATE --------
            model.eval()
            val_losses = []

            with torch.no_grad():
                for Xb, yb in val_loader:
                    Xb = Xb.to(device, non_blocking=True)
                    yb = yb.to(device, non_blocking=True)
                    preds = model(Xb)
                    val_losses.append(((preds - yb) ** 2).mean().item())

            val_mse = np.mean(val_losses)

            print(
                f"Epoch {epoch+1:02d} | "
                f"Val MSE: {val_mse:.6f}"
            )

            if val_mse < best_val_mse:
                best_val_mse = val_mse
                best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
                patience_ctr = 0
            else:
                patience_ctr += 1
                if patience_ctr >= patience:
                    break

        model.load_state_dict(best_state)
        model.to(device)

        transformer_results[asset][opt_type] = {
            "model": model
        }



 Training Transformer for SPY CALL
Epoch 01 | Val MSE: 0.036280
Epoch 02 | Val MSE: 0.030346
Epoch 03 | Val MSE: 0.030056
Epoch 04 | Val MSE: 0.022586
Epoch 05 | Val MSE: 0.023081
Epoch 06 | Val MSE: 0.025834

 Training Transformer for SPY PUT
Epoch 01 | Val MSE: 0.175668
Epoch 02 | Val MSE: 0.153540
Epoch 03 | Val MSE: 0.147491
Epoch 04 | Val MSE: 0.104321
Epoch 05 | Val MSE: 0.103748
Epoch 06 | Val MSE: 0.086345
Epoch 07 | Val MSE: 0.133853
Epoch 08 | Val MSE: 0.081005
Epoch 09 | Val MSE: 0.115832
Epoch 10 | Val MSE: 0.096886

 Training Transformer for AAPL CALL
Epoch 01 | Val MSE: 0.028267
Epoch 02 | Val MSE: 0.021278
Epoch 03 | Val MSE: 0.020097
Epoch 04 | Val MSE: 0.017161
Epoch 05 | Val MSE: 0.018020
Epoch 06 | Val MSE: 0.018489

 Training Transformer for AAPL PUT
Epoch 01 | Val MSE: 0.053091
Epoch 02 | Val MSE: 0.046072
Epoch 03 | Val MSE: 0.038626
Epoch 04 | Val MSE: 0.039831
Epoch 05 | Val MSE: 0.032174
Epoch 06 | Val MSE: 0.036796
Epoch 07 | Val MSE: 0.042362


In [16]:
# ============================================================
# Evaluation in PRICE SPACE (BS + ML residual) — CLIPPED
# ============================================================
# This version prevents exponential blow-ups by clipping
# log-residual predictions to a realistic range.
# ============================================================

def evaluate_bs_residual(
    model, loader, scaler_y,
    base_df, price_col, opt_type,
    risk_free_rate=0.02,
    clip_value=2.0   # <-- KEY PARAMETER
):
    """
    clip_value = 2.0 means:
      exp(-2) ≈ 0.14
      exp(+2) ≈ 7.39
    which already allows very large option mispricing.
    """

    model.eval()
    preds = []

    # --------------------------------------------
    # Predict residuals (log-space)
    # --------------------------------------------
    with torch.no_grad():
        for Xb, _ in loader:
            Xb = Xb.to(device, non_blocking=True)
            preds.append(model(Xb).cpu().numpy())

    preds = scaler_y.inverse_transform(np.vstack(preds)).ravel()

    # --------------------------------------------
    # CLIP RESIDUALS (CRITICAL FIX)
    # --------------------------------------------
    preds = np.clip(preds, -clip_value, clip_value)

    # --------------------------------------------
    # Recompute BS prices at time t
    # --------------------------------------------
    option_flag = "call" if opt_type == "CALL" else "put"

    bs_prices = black_scholes_price(
        S=base_df["UNDERLYING_LAST"].values,
        K=base_df["STRIKE"].values,
        T=base_df["DTE"].values / 365.0,
        r=risk_free_rate,
        sigma=base_df["garch_vol"].values,
        option_type=option_flag
    )

    # --------------------------------------------
    # True market prices
    # --------------------------------------------
    true_prices = base_df[price_col].values

    # --------------------------------------------
    # Reconstruct predicted prices
    # --------------------------------------------
    pred_prices = bs_prices * np.exp(preds)

    # --------------------------------------------
    # Metrics
    # --------------------------------------------
    mae  = mean_absolute_error(true_prices, pred_prices)
    rmse = np.sqrt(mean_squared_error(true_prices, pred_prices))

    mae_pct  = 100 * mae / np.mean(true_prices)
    rmse_pct = 100 * rmse / np.mean(true_prices)

    return mae, rmse, mae_pct, rmse_pct


# ============================================================
# Run evaluation (VAL first, then TEST)
# ============================================================

for split_name in ["val", "test"]:
    print(f"\n==============================")
    print(f"{split_name.upper()} RESULTS (BS + ML, CLIPPED)")
    print(f"==============================")

    for asset, asset_data in scaled.items():
        for opt_type, data in asset_data.items():

            loader = loaders[asset][opt_type][split_name]
            model_mlp = results[asset][opt_type]["model"]
            model_tf  = transformer_results[asset][opt_type]["model"]

            price_col = data["price_col"]

            # Base dataframe aligned to loader length
            base_df = splits[asset][split_name].copy()
            base_df = base_df.loc[base_df[price_col] > 0].copy()
            base_df = base_df.iloc[:len(loader.dataset)]

            mlp_metrics = evaluate_bs_residual(
                model_mlp,
                loader,
                data["scalers"][1],
                base_df,
                price_col,
                opt_type
            )

            tf_metrics = evaluate_bs_residual(
                model_tf,
                loader,
                data["scalers"][1],
                base_df,
                price_col,
                opt_type
            )

            print(
                f"{asset} {opt_type} | "
                f"MLP MAE: {mlp_metrics[0]:.3f} ({mlp_metrics[2]:.2f}%) | "
                f"RMSE: {mlp_metrics[1]:.3f} ({mlp_metrics[3]:.2f}%) || "
                f"TF MAE: {tf_metrics[0]:.3f} ({tf_metrics[2]:.2f}%) | "
                f"RMSE: {tf_metrics[1]:.3f} ({tf_metrics[3]:.2f}%)"
            )



VAL RESULTS (BS + ML, CLIPPED)
SPY CALL | MLP MAE: 3.404 (6.57%) | RMSE: 6.245 (12.05%) || TF MAE: 3.950 (7.62%) | RMSE: 6.547 (12.63%)
SPY PUT | MLP MAE: 6.745 (28.33%) | RMSE: 11.248 (47.24%) || TF MAE: 7.900 (33.18%) | RMSE: 12.135 (50.97%)
AAPL CALL | MLP MAE: 2.294 (6.81%) | RMSE: 4.534 (13.45%) || TF MAE: 2.834 (8.41%) | RMSE: 5.048 (14.98%)
AAPL PUT | MLP MAE: 3.034 (11.93%) | RMSE: 5.247 (20.63%) || TF MAE: 2.930 (11.52%) | RMSE: 5.253 (20.65%)

TEST RESULTS (BS + ML, CLIPPED)
SPY CALL | MLP MAE: 5.437 (10.49%) | RMSE: 8.654 (16.70%) || TF MAE: 5.451 (10.52%) | RMSE: 9.158 (17.67%)
SPY PUT | MLP MAE: 6.459 (31.86%) | RMSE: 10.495 (51.77%) || TF MAE: 6.044 (29.81%) | RMSE: 9.551 (47.11%)
AAPL CALL | MLP MAE: 2.376 (9.90%) | RMSE: 4.696 (19.57%) || TF MAE: 2.870 (11.96%) | RMSE: 5.085 (21.19%)
AAPL PUT | MLP MAE: 3.277 (9.62%) | RMSE: 5.105 (14.98%) || TF MAE: 2.775 (8.14%) | RMSE: 4.916 (14.42%)
