In [164]:
# Imports
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
#from tqdm.notebook import tqdm


import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
#from google.colab import drive
#drive.mount('/content/drive')

# Seeds
import random, os
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [165]:
df = pd.read_csv("uc3m_master_thesis/data/amex.csv")

In [167]:
def preprocess_stock(df_raw, patch_len: int, train_frac: float, val_frac: float, add_calendar: bool = False):
    minutely = df_raw.copy()

    # turn column to datetime, add helper day column
    minutely["Date Time"] = pd.to_datetime(minutely["Date Time"])
    minutely = minutely.sort_values("Date Time")
    minutely["day"] = minutely["Date Time"].dt.date

    # minutely log returns withing days from close
    minutely["log_close"] = np.log(minutely["Close"].astype(float))
    minutely["ret_1m"] = minutely.groupby("day")["log_close"].diff()
    # remove first minute of each day without returns
    minutely = minutely[minutely["Date Time"].dt.time != pd.to_datetime("09:35").time()]

    # compute daily RV
    daily = (minutely.groupby("day")["ret_1m"]
               .agg(rv=lambda x: np.sum(x**2))
               .reset_index())

    # in case there is a zero RV, replace it with small value before log
    daily["log_rv"] = np.log(daily["rv"].replace(0.0, 1e-12))
     # label for inputs from day d is log_rv of day d+1
    daily["log_rv_tomorrow"] = daily["log_rv"].shift(-1)
    # drop last day because no target in both daily and minutely df
    valid_days = daily["day"].iloc[:-1]
    daily = daily.iloc[:-1]
    minutely = minutely.iloc[:-380]
    
    # create minute index in each day
    minutely["idx_in_day"] = minutely.groupby("day").cumcount()


    ret_matrix = (
        minutely.pivot(index="day", columns="idx_in_day", values="ret_1m")
        .reindex(valid_days)        # align order with daily
        .to_numpy(dtype=np.float32) # shape: [n_days, 380]
    )
    if 380 % patch_len != 0:
        raise ValueError(f"Patch length {patch_len} does not evenly divide 380 minutes")
    
    T_tokens = 380 // patch_len
    X_days = ret_matrix.reshape(ret_matrix.shape[0], T_tokens, patch_len)
    y_days = daily["log_rv_tomorrow"].to_numpy(dtype=np.float32)


    # --- optional: concatenate day-level calendar features along feature axis ---
    cal_names = []
    if add_calendar:
        cal_feats, cal_names = _build_calendar_features_daylevel(
            valid_days=pd.to_datetime(valid_days),
            tokens_per_day=T_tokens
        )  # [N_days, T_tokens, 9]
        X_days = np.concatenate([X_days, cal_feats], axis=-1)  # d_in becomes patch_len + 9
        
    # --- chronological split on days ---
    N = X_days.shape[0]
    if not (0 < train_frac < 1 and 0 <= val_frac < 1 and train_frac + val_frac < 1):
        raise ValueError("train_frac and val_frac must be in (0,1) and sum to < 1")
    n_train = int(np.floor(N * train_frac))
    n_val   = int(np.floor(N * val_frac))
    i0, i1, i2 = 0, n_train, n_train + n_val

    out = {
        "X_train": X_days[i0:i1], "y_train": y_days[i0:i1],
        "X_val":   X_days[i1:i2], "y_val":   y_days[i1:i2],
        "X_test":  X_days[i2:],   "y_test":  y_days[i2:],
        "tokens_per_day": T_tokens,
        "patch_len": patch_len,
        "calendar_feature_names": cal_names,  # [] if add_calendar=False
    }
    return out

In [152]:
import numpy as np

def stack_context_days(X_days: np.ndarray,
                       y_days: np.ndarray,
                       context_window: int,
                       stride_days: int = 1):
    """
    X_days: [N_days, T_tokens, d_in]
    y_days: [N_days]
    context_window: total tokens in a window (e.g., T_tokens * n_days_context)
    stride_days: slide the window by this many days (default 1)
    Returns:
      X_ctx: [N_windows, context_window, d_in]
      y_ctx: [N_windows]
    """
    N, T_tokens, d_in = X_days.shape
    if context_window % T_tokens != 0:
        raise ValueError(
            f"context_window ({context_window}) must be a multiple of tokens/day ({T_tokens})"
        )
    days_per_window = context_window // T_tokens
    if N < days_per_window:
        return np.empty((0, context_window, d_in), dtype=np.float32), np.empty((0,), dtype=np.float32)

    starts = np.arange(0, N - days_per_window + 1, stride_days)
    X_ctx = np.stack(
        [
            X_days[s:s+days_per_window].reshape(-1, d_in)  # [days_per_window*T_tokens, d_in]
            for s in starts
        ],
        axis=0
    ).astype(np.float32)

    # take label from the last day in each window
    y_ctx = np.stack([y_days[s + days_per_window - 1] for s in starts], axis=0).astype(np.float32)
    return X_ctx, y_ctx


In [4]:
# ---- CUDA-only loader: pinned memory + persistent workers ----
def mk_loader(X, y, bs=64, shuffle=False, num_workers=2):
    x_t = torch.tensor(X, dtype=torch.float32)
    y_t = torch.tensor(y, dtype=torch.float32)
    ds = TensorDataset(x_t, y_t)
    return DataLoader(
        ds, batch_size=bs, shuffle=shuffle,
        pin_memory=True, num_workers=num_workers,
        persistent_workers=(num_workers > 0)
    )

In [168]:
# ---- Tiny Transformer with learnable positional embeddings ----
class VolTransformerTiny(nn.Module):
    def __init__(self, d_in, d_model=128, nhead=4, num_layers=3,
                 p_drop=0.1, use_cls=True, ff_mult=4, max_len=4096):
        super().__init__()
        self.use_cls = use_cls
        self.embed = nn.Linear(d_in, d_model)

        # positional embedding for tokens (add +1 for CLS position)
        self.pos_emb = nn.Embedding(max_len + 1, d_model)

        enc_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead,
            dim_feedforward=ff_mult*d_model,
            dropout=p_drop, batch_first=True, norm_first=True
        )
        self.encoder = nn.TransformerEncoder(enc_layer, num_layers=num_layers)

        if use_cls:
            self.cls = nn.Parameter(torch.zeros(1, 1, d_model))
            nn.init.normal_(self.cls, mean=0.0, std=0.02)

        self.head = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Dropout(p_drop),
            nn.Linear(d_model, 1)
        )

    def forward(self, x):  # x: [B, L, d_in]
        B, L, _ = x.shape
        h = self.embed(x)  # [B, L, d_model]

        # add positional embeddings
        # if CLS is used, its position index = 0, the rest shifted by +1
        if self.use_cls:
            pos_idx = torch.arange(1, L+1, device=x.device).unsqueeze(0).expand(B, -1)
            h = h + self.pos_emb(pos_idx)
            cls = self.cls.expand(B, -1, -1)
            # CLS gets position 0
            h = torch.cat([cls, h], dim=1)  # [B, L+1, d_model]
            h = h + torch.cat([self.pos_emb(torch.zeros(B,1, dtype=torch.long, device=x.device)),
                               torch.zeros_like(h[:,1:])], dim=1)
        else:
            pos_idx = torch.arange(0, L, device=x.device).unsqueeze(0).expand(B, -1)
            h = h + self.pos_emb(pos_idx)

        h = self.encoder(h)
        pooled = h[:, 0] if self.use_cls else h.mean(dim=1)
        return self.head(pooled).squeeze(-1)


In [161]:
# ---- CUDA-only trainer with AMP ----
def train_model_cuda(Xtr, ytr, Xva, yva, *,
                     d_model=128, nhead=4, num_layers=3,
                     batch_size=64, lr=1e-3, max_epochs=50,
                     weight_decay=1e-2, p_drop=0.1, patience=7,
                     ff_mult=4, ModelClass=None):
    assert torch.cuda.is_available(), "CUDA expected on Colab GPU runtime"
    device = torch.device("cuda")
    torch.backends.cudnn.benchmark = True  # speed up for fixed shapes

    # Data
    train_loader = mk_loader(Xtr, ytr, bs=batch_size, shuffle=True)
    val_loader   = mk_loader(Xva, yva, bs=batch_size, shuffle=False)

    # Model
    if ModelClass is None:
        ModelClass = VolTransformerTiny  # use your class defined elsewhere
    model = ModelClass(
        d_in=Xtr.shape[-1], d_model=d_model, nhead=nhead,
        num_layers=num_layers, p_drop=p_drop, ff_mult=ff_mult,
        max_len=max(Xtr.shape[1], Xva.shape[1]) + 1
    ).to(device)

    opt = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scaler = torch.cuda.amp.GradScaler()

    best_val, best_state, wait = float('inf'), None, 0

    for epoch in range(1, max_epochs + 1):
        # ---- train ----
        model.train()
        tr_loss, n = 0.0, 0
        for xb, yb in tqdm(train_loader, desc=f"Epoch {epoch}/{max_epochs}", leave=False):
            xb = xb.to(device, non_blocking=True)
            yb = yb.to(device, non_blocking=True)

            opt.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast():
                pred = model(xb)
                loss = F.mse_loss(pred, yb)
            scaler.scale(loss).backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            scaler.step(opt)
            scaler.update()

            tr_loss += loss.item() * xb.size(0)
            n += xb.size(0)
        tr_loss /= max(1, n)

        # ---- validate ----
        model.eval()
        va_loss, n = 0.0, 0
        with torch.no_grad(), torch.cuda.amp.autocast():
            for xb, yb in val_loader:
                xb = xb.to(device, non_blocking=True)
                yb = yb.to(device, non_blocking=True)
                pred = model(xb)
                loss = F.mse_loss(pred, yb)
                va_loss += loss.item() * xb.size(0)
                n += xb.size(0)
        va_loss /= max(1, n)

        improved = va_loss < best_val
        print(f"Epoch {epoch:03d} | Train {tr_loss:.6f} | Val {va_loss:.6f}{' *' if improved else ''}")

        if improved:
            best_val = va_loss
            best_state = {k: v.detach().cpu() for k, v in model.state_dict().items()}
            wait = 0
        else:
            wait += 1
            if wait >= patience:
                print(f"Early stopping at epoch {epoch}")
                break

    if best_state is not None:
        model.load_state_dict(best_state)
        model.to(device)
        print(f"Restored best model with validation loss: {best_val:.6f}")
    else:
        print("Warning: no improvement; returning final weights")

    return model

In [162]:
# ---- CUDA-only evaluation ----
def eval_mse_cuda(X, y, model, bs=64):
    device = torch.device("cuda")
    loader = mk_loader(X, y, bs=bs, shuffle=False)
    model.eval()
    tot, n = 0.0, 0
    with torch.no_grad(), torch.cuda.amp.autocast():
        for xb, yb in loader:
            xb = xb.to(device, non_blocking=True)
            yb = yb.to(device, non_blocking=True)
            pred = model(xb)
            loss = F.mse_loss(pred, yb)
            tot += loss.item() * xb.size(0)
            n += xb.size(0)
    return tot / max(1, n)

# ---- CUDA-only prediction ----
def predict_array_cuda(X, model, bs=64):
    device = torch.device("cuda")
    loader = mk_loader(X, np.zeros(len(X), dtype=np.float32), bs=bs, shuffle=False)
    outs = []
    model.eval()
    with torch.no_grad(), torch.cuda.amp.autocast():
        for xb, _ in loader:
            xb = xb.to(device, non_blocking=True)
            outs.append(model(xb).float().cpu().numpy())
    return np.concatenate(outs, axis=0)

In [166]:
def _build_calendar_features_daylevel(valid_days: pd.Series, tokens_per_day: int):
    """
    Build per-day calendar features (broadcast to all tokens of that day).
    Features:
      - day-of-week (sin, cos)         -> trading week cycle
      - month-of-year (sin, cos)       -> annual cycle
      - day-of-month (sin, cos)        -> month cycle
      - year index (normalized [-1,1]) -> regime/long drift
      - is_month_start, is_month_end   -> boundary flags
    Returns:
      cal_feats: [N_days, tokens_per_day, D_cal]
      names: list[str] (length D_cal)
    """
    dti = pd.to_datetime(valid_days.values)

    dow   = dti.weekday          # 0..6 (trading 0..4)
    month = dti.month            # 1..12
    dom   = dti.day              # 1..31
    year  = dti.year

    # normalize year index to [-1, 1]
    year0 = year.min()
    yr_idx = (year - year0).astype(np.float32)
    yr_idx = 2.0 * (yr_idx / max(1.0, yr_idx.max())) - 1.0 if yr_idx.max() > 0 else np.zeros_like(yr_idx, dtype=np.float32)

    # cyclical encodings
    dow_sin,   dow_cos   = np.sin(2*np.pi * dow / 5.0),              np.cos(2*np.pi * dow / 5.0)
    month_sin, month_cos = np.sin(2*np.pi * (month-1) / 12.0),       np.cos(2*np.pi * (month-1) / 12.0)
    dom_sin,   dom_cos   = np.sin(2*np.pi * (dom-1) / 31.0),         np.cos(2*np.pi * (dom-1) / 31.0)

    s = pd.Series(dti)
    is_month_end   = s.dt.is_month_end.astype(np.float32).values
    is_month_start = s.dt.is_month_start.astype(np.float32).values

    day_mat = np.stack([
        dow_sin, dow_cos,
        month_sin, month_cos,
        dom_sin, dom_cos,
        yr_idx,
        is_month_start, is_month_end
    ], axis=1).astype(np.float32)                       # [N_days, 9]

    # broadcast to all tokens in the day
    cal_feats = np.repeat(day_mat[:, None, :], tokens_per_day, axis=1)  # [N_days, T_tokens, 9]
    names = ["dow_sin","dow_cos","month_sin","month_cos","dom_sin","dom_cos","year_idx","is_month_start","is_month_end"]
    return cal_feats, names

In [171]:
# choose your trainer/predict/eval trio:
USE_CUDA_ONLY = True
if USE_CUDA_ONLY:
    trainer_fn   = train_model_cuda
    eval_fn      = eval_mse_cuda
    predict_fn   = predict_array_cuda
else:
    trainer_fn   = train_model
    eval_fn      = eval_mse
    predict_fn   = predict_array

def test_stocks(file_list, *, patch_len=5, train_frac=0.7, val_frac=0.15,
                n_days_context=3, add_calendar=False,
                d_model=256, nhead=4, num_layers=3,
                batch_size=64, lr=1e-3, max_epochs=50,
                save_dir="/content/drive/MyDrive/data"):
    os.makedirs(save_dir, exist_ok=True)

    for path in file_list:
        df_raw = pd.read_csv(path)

        # preprocess (optionally add calendar features)
        out = preprocess_stock(
            df_raw, patch_len=patch_len,
            train_frac=train_frac, val_frac=val_frac,
            add_calendar=add_calendar
        )

        # derive context window from tokens_per_day
        T_per_day = out["tokens_per_day"]          # e.g., 76 when patch_len=5
        context_window = T_per_day * n_days_context

        # build sequences per split
        Xtr_ctx, ytr_ctx = stack_context_days(out["X_train"], out["y_train"], context_window=context_window)
        Xva_ctx, yva_ctx = stack_context_days(out["X_val"],   out["y_val"],   context_window=context_window)
        Xte_ctx, yte_ctx = stack_context_days(out["X_test"],  out["y_test"],  context_window=context_window)

        # train
        model = trainer_fn(
            Xtr_ctx, ytr_ctx, Xva_ctx, yva_ctx,
            d_model=d_model, nhead=nhead, num_layers=num_layers,
            batch_size=batch_size, lr=lr, max_epochs=max_epochs
        )

        # evaluate
        yte_pred_log = predict_fn(Xte_ctx, model)
        yte_true_log = yte_ctx

        val_mse = eval_fn(Xva_ctx, yva_ctx, model)
        te_mse  = eval_fn(Xte_ctx, yte_ctx, model)
        print(f"Val MSE (logRV): {val_mse:.6f} | Test MSE (logRV): {te_mse:.6f}")

        # naive baseline (y_t = y_{t-1})
        y_naive = np.roll(yte_true_log, 1)
        naive_mse = np.mean((y_naive[1:] - yte_true_log[1:])**2)
        print(f"Naive baseline Test MSE (logRV): {naive_mse:.6f}")
        print(f"Transformer Test MSE (logRV): {te_mse:.6f}")

        # save model + plot
        stock_name = os.path.splitext(os.path.basename(path))[0]
        ckpt_path = os.path.join(save_dir, f"vol_transformer_{stock_name}_dm{d_model}_h{nhead}_L{num_layers}.pt")
        torch.save(model.state_dict(), ckpt_path)
        print(f"Saved model to {ckpt_path}")

        plt.figure(figsize=(12, 6))
        plt.plot(yte_true_log, label="True log RV", color="black")
        plt.plot(y_naive, label="Naive (yesterday=today)", linestyle="--")
        plt.plot(yte_pred_log, label="Transformer", alpha=0.8)
        plt.title(f"{stock_name} (Test) | Naive: {naive_mse:.4f} | Transformer: {te_mse:.4f}")
        plt.xlabel("Test days")
        plt.ylabel("log Realized Volatility")
        plt.legend()
        fig_path = os.path.join(save_dir, f"{stock_name}_dm{d_model}_h{nhead}_L{num_layers}_ctx{n_days_context}.png")
        plt.savefig(fig_path, dpi=300, bbox_inches="tight")
        plt.close()
        print(f"Saved plot to {fig_path}\n")


In [14]:
def test_stocks(file_list):
  for path in file_list:
    df_raw = pd.read_csv(path)
    out = preprocess_stock(df_raw, patch_len=5, train_frac=0.7, val_frac=0.15)
    Xtr_ctx, ytr_ctx = stack_context_days(out["X_train"], out["y_train"], context_window=76*3)
    Xva_ctx, yva_ctx = stack_context_days(out["X_val"],   out["y_val"],   context_window=76*3)
    Xte_ctx, yte_ctx = stack_context_days(out["X_test"],  out["y_test"],  context_window=76*3)

    # Train the model
    model = train_model(
        Xtr_ctx, ytr_ctx, Xva_ctx, yva_ctx,
        d_model=256, nhead=4, num_layers=3,
        batch_size=64, lr=1e-3, max_epochs=50
    )
    yte_pred_log = predict_array(Xte_ctx, model)          # predicted log(RV)
    yte_true_log = yte_ctx                         # true log(RV)

    # save model
    stock_name = os.path.splitext(os.path.basename(path))[0]
    torch.save(model.state_dict(), f"/content/drive/MyDrive/data/vol_transformer_{stock_name}_256_head4_lay3_bs64_lre3_pl3_sq10.pt")
    val_mse = eval_mse(Xva_ctx, yva_ctx, model)
    te_mse  = eval_mse(Xte_ctx, yte_ctx, model)
    print(f"Val MSE (logRV): {val_mse:.6f} | Test MSE (logRV): {te_mse:.6f}")
    y_naive = np.roll(yte_true_log, 1)
    y_true  = yte_true_log
    naive_mse = np.mean((y_naive[1:] - y_true[1:])**2)
    #naive_mse = np.mean((y_naive - yte_true_log)**2)
    print(f"Naive baseline Test MSE (logRV): {naive_mse:.6f}")
    print(f"Transformer Test MSE (logRV): {te_mse:.6f}")

    #import matplotlib.pyplot as plt

    plt.figure(figsize=(12,6))
    plt.plot(yte_true_log, label="True log RV", color="black")
    plt.plot(y_naive, label="Naive baseline (yesterday = today)", linestyle="--")
    plt.plot(yte_pred_log, label="Transformer", alpha=0.8)
    plt.title(f"Volatility Forecasts ({stock_name} Test set); Naive: {naive_mse:.4f}; Transformer: {te_mse:.4f}")
    plt.xlabel("Test days")
    plt.ylabel("log Realized Volatility")
    plt.legend()
    plt.savefig(f"/content/drive/MyDrive/data/{stock_name}_256_4_3_pl5_sq5.png", dpi=300, bbox_inches="tight")
    plt.show()



    plt.close()

In [8]:
df_ibm = pd.read_csv("/content/drive/MyDrive/data/stocks/Apple.csv")
data_folder = "/content/drive/MyDrive/data/stocks/"
stock_files = glob.glob(os.path.join(data_folder, "*.csv"))
stock_files

['/content/drive/MyDrive/data/stocks/Proctor_Gamble.csv',
 '/content/drive/MyDrive/data/stocks/United_Health.csv',
 '/content/drive/MyDrive/data/stocks/Microsoft.csv',
 '/content/drive/MyDrive/data/stocks/Disney.csv',
 '/content/drive/MyDrive/data/stocks/Boeing.csv',
 '/content/drive/MyDrive/data/stocks/Honeywell.csv',
 '/content/drive/MyDrive/data/stocks/Intel.csv',
 '/content/drive/MyDrive/data/stocks/Home_Depot.csv',
 '/content/drive/MyDrive/data/stocks/Salesforce.csv',
 '/content/drive/MyDrive/data/stocks/Verizon_Communications.csv',
 '/content/drive/MyDrive/data/stocks/Merck_Co.csv',
 '/content/drive/MyDrive/data/stocks/Johnson_Johnson.csv',
 '/content/drive/MyDrive/data/stocks/Travelers.csv',
 '/content/drive/MyDrive/data/stocks/Apple.csv',
 '/content/drive/MyDrive/data/stocks/IBM.csv',
 '/content/drive/MyDrive/data/stocks/Caterpillar.csv',
 '/content/drive/MyDrive/data/stocks/McDonalds.csv',
 '/content/drive/MyDrive/data/stocks/amex.csv',
 '/content/drive/MyDrive/data/stocks/Coc

In [None]:
out = preprocess_stock(df_ibm, patch_len=5, train_frac=0.7, val_frac=0.15)
Xtr_ctx, ytr_ctx = stack_context_days(out["X_train"], out["y_train"], context_window=5)
Xva_ctx, yva_ctx = stack_context_days(out["X_val"],   out["y_val"],   context_window=5)
Xte_ctx, yte_ctx = stack_context_days(out["X_test"],  out["y_test"],  context_window=5)

ret_1m
381    2516
Name: count, dtype: int64


KeyboardInterrupt: 

In [10]:

loss_fn = torch.nn.MSELoss()

def eval_mse(X, y, model, bs=32):
    loader = mk_loader(X, y, bs=bs, shuffle=False)
    model.eval()
    tot, n = 0.0, 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.cuda(), yb.cuda()
            pred = model(xb)
            loss = torch.nn.MSELoss(pred, yb)
            tot += loss.item() * xb.size(0)
            n += xb.size(0)
    return tot / max(1, n)



# Get all test predictions
def predict_array(X, model, bs=32):
    loader = mk_loader(X, np.zeros(len(X), dtype=np.float32), bs=bs, shuffle=False)
    preds = []
    model.eval()
    with torch.no_grad():
        for xb, _ in loader:
            xb = xb.cuda()
            preds.append(model(xb).cpu().numpy())
    return np.concatenate(preds, axis=0)

#yte_pred_log = predict_array(Xte_ctx)          # predicted log(RV)
#yte_true_log = yte_ctx                         # true log(RV)

# save model
import torch
#torch.save(model.state_dict(), "/content/drive/MyDrive/data/vol_transformer_apple_256_head4_lay3_bs64_lre3_pl3_sq10.pt")
#print("Saved to vol_transformer_tiny.pt")


In [None]:
y_naive = np.roll(yte_true_log, 1)
y_true  = yte_true_log
naive_mse = np.mean((y_naive - y_true)**2)
#naive_mse = np.mean((y_naive - yte_true_log)**2)
print(f"Naive baseline Test MSE (logRV): {naive_mse:.6f}")
print(f"Transformer Test MSE (logRV): {te_mse:.6f}")

import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))
plt.plot(yte_true_log, label="True log RV", color="black")
plt.plot(y_naive, label="Naive baseline (yesterday = tomorrow)", linestyle="--")
plt.plot(yte_pred_log, label="Transformer", alpha=0.8)
plt.title(f"Volatility Forecasts (IBM Test set); Naive: {naive_mse:.3f}; Transformer: {te_mse:.3f}")
plt.xlabel("Test days")
plt.ylabel("log Realized Volatility")
plt.legend()
plt.show()


NameError: name 'yte_true_log' is not defined

In [None]:

print(y_naive[0], yte_true_log[0])

In [None]:

plt.figure(figsize=(12,6))
plt.plot(yte_true_log[:50], label="True")
plt.plot(y_naive[:50], label="Naive", linestyle="--")
plt.plot(yte_pred_log[:50], label="Transformer", alpha=0.8)
plt.legend(); plt.show()


In [None]:
import os
y_naive = np.roll(yte_true_log, 1)
y_true  = yte_true_log
naive_mse = np.mean((y_naive - y_true)**2)
#naive_mse = np.mean((y_naive - yte_true_log)**2)
print(f"Naive baseline Test MSE (logRV): {naive_mse:.6f}")
print(f"Transformer Test MSE (logRV): {te_mse:.6f}")

import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))
plt.plot(yte_true_log, label="True log RV", color="black")
plt.plot(y_naive, label="Naive baseline (yesterday = today)", linestyle="--")
plt.plot(yte_pred_log, label="Transformer", alpha=0.8)
plt.title("Volatility Forecasts (IBM Test set)")
plt.xlabel("Test days")
plt.ylabel("log Realized Volatility")
plt.legend()
plt.savefig("/content/drive/MyDrive/data/batch_5.png", dpi=300, bbox_inches="tight")
plt.show()



plt.close()

In [None]:
y_naive = np.roll(yte_true_log, 1)
y_true  = yte_true_log
naive_mse = np.mean((y_naive[1:] - y_true[1:])**2)
#naive_mse = np.mean((y_naive - yte_true_log)**2)
print(f"Naive baseline Test MSE (logRV): {naive_mse:.6f}")

In [None]:
import os
y_naive = np.roll(yte_true_log, 1)
y_true  = yte_true_log
naive_mse = np.mean((y_naive[1:] - y_true)**2)
#naive_mse = np.mean((y_naive - yte_true_log)**2)
print(f"Naive baseline Test MSE (logRV): {naive_mse:.6f}")
print(f"Transformer Test MSE (logRV): {te_mse:.6f}")

import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))
plt.plot(yte_true_log, label="True log RV", color="black")
plt.plot(y_naive, label="Naive baseline (yesterday = today)", linestyle="--")
plt.plot(yte_pred_log, label="Transformer", alpha=0.8)
plt.title(f"Volatility Forecasts (IBM Test set); Naive: {naive_mse:.3f}; Transformer: {te_mse:.3f}")
plt.xlabel("Test days")
plt.ylabel("log Realized Volatility")
plt.legend()
plt.savefig("/content/drive/MyDrive/data/256_4_3_pl3_sq10.png", dpi=300, bbox_inches="tight")
plt.show()



plt.close()

In [None]:
y_naive = np.roll(yte_true_log, 1)
y_true  = yte_true_log
naive_mse = np.mean((y_naive[1:] - y_true[1:])**2)
#naive_mse = np.mean((y_naive - yte_true_log)**2)
print(f"Naive baseline Test MSE (logRV): {naive_mse:.6f}")
print(f"Transformer Test MSE (logRV): {te_mse:.6f}")

import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))
plt.plot(yte_true_log, label="True log RV", color="black")
plt.plot(y_naive, label="Naive baseline (yesterday = today)", linestyle="--")
plt.plot(yte_pred_log, label="Transformer", alpha=0.8)
plt.title(f"Volatility Forecasts (Amgen Test set); Naive: {naive_mse:.3f}; Transformer: {te_mse:.3f}")
plt.xlabel("Test days")
plt.ylabel("log Realized Volatility")
plt.legend()
plt.savefig("/content/drive/MyDrive/data/amgen_256_4_3_pl5_sq5.png", dpi=300, bbox_inches="tight")
plt.show()



plt.close()

In [None]:
y_naive = np.roll(yte_true_log, 1)
y_true  = yte_true_log
naive_mse = np.mean((y_naive[1:] - y_true[1:])**2)
#naive_mse = np.mean((y_naive - yte_true_log)**2)
print(f"Naive baseline Test MSE (logRV): {naive_mse:.6f}")
print(f"Transformer Test MSE (logRV): {te_mse:.6f}")

#import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))
plt.plot(yte_true_log, label="True log RV", color="black")
plt.plot(y_naive, label="Naive baseline (yesterday = today)", linestyle="--")
plt.plot(yte_pred_log, label="Transformer", alpha=0.8)
plt.title(f"Volatility Forecasts (Apple Test set); Naive: {naive_mse:.4f}; Transformer: {te_mse:.4f}")
plt.xlabel("Test days")
plt.ylabel("log Realized Volatility")
plt.legend()
plt.savefig("/content/drive/MyDrive/data/apple_256_4_3_pl5_sq5.png", dpi=300, bbox_inches="tight")
plt.show()



plt.close()

In [15]:
test_stocks(["/content/drive/MyDrive/data/stocks/Apple.csv"])

ret_1m
381    2516
Name: count, dtype: int64




Epoch 1/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 001 | Train 3.980377 | Val 0.606014 *


Epoch 2/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 002 | Train 0.557126 | Val 0.700737


Epoch 3/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 003 | Train 0.554645 | Val 0.557867 *


Epoch 4/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 004 | Train 0.559986 | Val 0.547633 *


Epoch 5/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 005 | Train 0.528814 | Val 0.790031


Epoch 6/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 006 | Train 0.473861 | Val 0.660205


Epoch 7/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 007 | Train 0.446143 | Val 0.565152


Epoch 8/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 008 | Train 0.446191 | Val 0.639821


Epoch 9/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 009 | Train 0.424747 | Val 0.582147


Epoch 10/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 010 | Train 0.512362 | Val 0.568213


Epoch 11/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 011 | Train 0.432351 | Val 0.540872 *


Epoch 12/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 012 | Train 0.393845 | Val 0.545043


Epoch 13/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 013 | Train 0.403451 | Val 0.603310


Epoch 14/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 014 | Train 0.410545 | Val 0.557046


Epoch 15/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 015 | Train 0.420125 | Val 0.656954


Epoch 16/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 016 | Train 0.394419 | Val 0.565854


Epoch 17/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 017 | Train 0.408686 | Val 0.520787 *


Epoch 18/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 018 | Train 0.402677 | Val 0.533311


Epoch 19/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 019 | Train 0.407671 | Val 0.636083


Epoch 20/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 020 | Train 0.425153 | Val 0.741278


Epoch 21/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 021 | Train 0.428978 | Val 0.602241


Epoch 22/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 022 | Train 0.388764 | Val 0.870960


Epoch 23/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 023 | Train 0.408852 | Val 0.690667


Epoch 24/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 024 | Train 0.402366 | Val 0.506436 *


Epoch 25/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 025 | Train 0.383210 | Val 0.552491


Epoch 26/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 026 | Train 0.381395 | Val 0.493002 *


Epoch 27/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 027 | Train 0.373499 | Val 0.743075


Epoch 28/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 028 | Train 0.384638 | Val 0.663274


Epoch 29/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 029 | Train 0.403045 | Val 0.530649


Epoch 30/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 030 | Train 0.373505 | Val 0.584816


Epoch 31/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 031 | Train 0.369133 | Val 0.544984


Epoch 32/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 032 | Train 0.387593 | Val 0.691602


Epoch 33/50:   0%|          | 0/27 [00:00<?, ?it/s]

Epoch 033 | Train 0.398575 | Val 0.543957
Early stopping at epoch 33 (no improvement for 7 epochs)
Restored best model with validation loss: 0.493002


RuntimeError: Boolean value of Tensor with more than one value is ambiguous