In [1]:
import torch, warnings, numpy as np
print("Torch:", torch.__version__, "| CUDA:", torch.cuda.is_available())
warnings.filterwarnings("ignore"); np.seterr(all="ignore")

Torch: 2.6.0+cu124 | CUDA: True


{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [2]:
%%writefile config.yaml
train_start_date: "2018-01-01"
train_end_date:   "2025-05-30"
valid_start_date: "2025-06-01"
valid_end_date:   "2025-08-22"

ticker_list:
  - NVDA
  - AMD
  - INTC
  - TXN
  - MCHP
  - QCOM
  - MSFT
  - ORCL
  - NOW
  - CSCO
  - IBM
  - INTU
  - SMCI
  - AAPL
  - AMZN
  - TSLA
  - GOOGL
  - META
  - PYPL
  - GPN
  - CWEN
  - FSLR
  - SEDG
  - BE
  - MNTK
  - AMTX
  - CLNE
  - CEG
  - BN
  - NEE
  - COP
  - DVN
  - ENB
  - XOM
  - PSX
  - SHEL
  - MPC
  - ED
  - PPL
  - SO

context_length: 160
batch_size: 128
grad_accum_steps: 2        
learning_rate: 0.0003
weight_decay: 0.0001
epochs: 18
d_model: 160
n_heads: 5
n_layers: 3
dropout: 0.15
random_seed: 123
device: "auto"

lambda_dir: 0.25
use_indicators: true
per_ticker_scaler: true
targets: ["Open","Close","Low","High","Volume"]

artifacts_dir: "artifacts"
cache_dir: "artifacts/cache"
model_path: "artifacts/kronos_p100.pth"
scaler_path: "artifacts/scalers.pkl"
predictions_path: "artifacts/predictions.json"
evaluation_csv: "artifacts/evaluation.csv"

use_kronos_base: false
kronos_repo: "NeoQuasar/Kronos-base"
kronos_tokenizer_repo: "NeoQuasar/Kronos-Tokenizer-base"

Writing config.yaml


In [3]:
%%writefile util.py
import os, pickle, yaml
from dataclasses import dataclass
from typing import List
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
from torch.utils.data import Dataset

TARGETS = ["Open","Close","Low","High","Volume"]

def load_cfg(path: str) -> dict:
    with open(path, "r") as f: return yaml.safe_load(f)

def ensure_dir(path: str): os.makedirs(path, exist_ok=True)

def set_seed(seed: int = 123):
    import random
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)

def yf_download(ticker: str, start: str, end: str) -> pd.DataFrame:
    end_exc = (pd.to_datetime(end) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
    df = yf.download(ticker, start=start, end=end_exc, progress=False, auto_adjust=False, group_by="column")
    if df is None or df.empty: return pd.DataFrame()
    if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.get_level_values(0)
    keep = ["Open","High","Low","Close","Volume"]
    for k in keep:
        if k not in df.columns: df[k] = np.nan
    df = df[keep].dropna()
    df.index = pd.to_datetime(df.index).tz_localize(None)
    df = df[~df.index.duplicated(keep="last")]
    return df

def add_indicators(df: pd.DataFrame) -> pd.DataFrame:
    from ta.trend import SMAIndicator, MACD
    from ta.momentum import RSIIndicator
    from ta.volatility import BollingerBands
    out = df.copy(); close = out["Close"].astype(float)
    for w in (5, 10, 20, 50): out[f"SMA{w}"] = SMAIndicator(close, window=w).sma_indicator()
    out["RSI14"] = RSIIndicator(close, window=14).rsi()
    macd = MACD(close)
    out["MACD"] = macd.macd(); out["MACD_Signal"] = macd.macd_signal(); out["MACD_Hist"] = macd.macd_diff()
    bb = BollingerBands(close, window=20, window_dev=2)
    out["BB_High"] = bb.bollinger_hband(); out["BB_Low"]  = bb.bollinger_lband()
    out["Ret1"] = close.pct_change(); out["LogRet1"] = np.log(close.replace(0, np.nan)).diff(); out["Vol20"] = out["LogRet1"].rolling(20).std()
    out = out.replace([np.inf, -np.inf], np.nan).dropna()
    return out

def build_features(df: pd.DataFrame, use_indicators: bool) -> pd.DataFrame:
    return add_indicators(df) if use_indicators else df.copy()

def load_or_build_features(ticker: str, cfg: dict, full_end: str) -> pd.DataFrame:
    ensure_dir(cfg["cache_dir"])
    cache_path = os.path.join(cfg["cache_dir"], f"{ticker}.parquet")
    if os.path.exists(cache_path):
        try:
            df = pd.read_parquet(cache_path)
            # quick freshness check
            if df.index.max().date() >= pd.to_datetime(cfg["valid_end_date"]).date():
                return df
        except Exception:
            pass
    raw = yf_download(ticker, cfg["train_start_date"], full_end)
    if raw.empty: return raw
    feat = build_features(raw, cfg.get("use_indicators", True))
    feat.to_parquet(cache_path)
    return feat

@dataclass
class TickerScalers:
    x: StandardScaler
    y_price: StandardScaler

def fit_scalers_per_ticker(train_df: pd.DataFrame, feature_cols: List[str]) -> TickerScalers:
    xs = train_df[feature_cols].values.astype(np.float32)
    prices = train_df[["Open","Close","Low","High"]].values.astype(np.float32)
    sx = StandardScaler().fit(xs); sy_price = StandardScaler().fit(prices)
    return TickerScalers(sx, sy_price)

class WindowDataset(Dataset):
    def __init__(self, x, prices, volumes, ctx, sx: StandardScaler, sy_price: StandardScaler, ticker_id: int):
        self.ctx = int(ctx)
        self.X = (x - sx.mean_) / np.where(sx.scale_==0, 1.0, sx.scale_)
        self.P = (prices - sy_price.mean_) / np.where(sy_price.scale_==0, 1.0, sy_price.scale_)
        self.V = np.log1p(volumes)
        self.T = x.shape[0]
        self.ticker_id = ticker_id
    def __len__(self): return max(0, self.T - self.ctx - 1)
    def __getitem__(self, idx):
        t = idx + self.ctx - 1
        x_ctx = self.X[t-(self.ctx-1):t+1]
        y_price_next = self.P[t+1]
        y_vol_next = self.V[t+1]
        return (torch.from_numpy(x_ctx).float(),
                torch.from_numpy(y_price_next).float(),
                torch.tensor(y_vol_next, dtype=torch.float32))

class Encoder(nn.Module):
    def __init__(self, f_in: int, d_model=160, n_heads=5, n_layers=3, dropout=0.15):
        super().__init__()
        self.proj = nn.Linear(f_in, d_model)
        enc_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_heads, dim_feedforward=4*d_model, dropout=dropout, batch_first=True, norm_first=True)
        self.encoder = nn.TransformerEncoder(enc_layer, num_layers=n_layers)
        self.norm = nn.LayerNorm(d_model)
    def forward(self, x):
        z = self.proj(x)
        h = self.encoder(z)
        return self.norm(h[:, -1, :])

class Heads(nn.Module):
    def __init__(self, d_model=160, hidden=160, dropout=0.15):
        super().__init__()
        self.shared = nn.Sequential(nn.Linear(d_model, hidden), nn.GELU(), nn.Dropout(dropout))
        self.price = nn.Linear(hidden, 4)
        self.vol   = nn.Linear(hidden, 1)
    def forward(self, h):
        s = self.shared(h)
        return self.price(s), self.vol(s)

class KronosP100(nn.Module):
    def __init__(self, f_in, cfg):
        super().__init__()
        self.enc = Encoder(f_in, cfg.get("d_model",160), cfg.get("n_heads",5), cfg.get("n_layers",3), cfg.get("dropout",0.15))
        self.heads = Heads(cfg.get("d_model",160), hidden=cfg.get("d_model",160), dropout=cfg.get("dropout",0.15))
    def forward(self, x):
        h = self.enc(x)
        return self.heads(h)

def device_from_cfg(cfg):
    if cfg.get("device") == "cpu": return torch.device("cpu")
    if cfg.get("device") == "cuda" and torch.cuda.is_available(): return torch.device("cuda")
    return torch.device("cuda" if torch.cuda.is_available() else "cpu")

Writing util.py


In [4]:
%%writefile train.py
import os, pickle
import numpy as np, pandas as pd
import torch, torch.nn as nn
from torch.utils.data import DataLoader, ConcatDataset
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm
from util import (load_cfg, ensure_dir, set_seed, build_features, load_or_build_features,
                  WindowDataset, fit_scalers_per_ticker, KronosP100, device_from_cfg)

def main():
    cfg = load_cfg("config.yaml")
    set_seed(cfg.get("random_seed",123))
    ensure_dir(cfg["artifacts_dir"]); ensure_dir(cfg["cache_dir"])
    dev = device_from_cfg(cfg)

    ctx = int(cfg["context_length"])
    gacc = int(cfg.get("grad_accum_steps", 1))
    use_ind = bool(cfg.get("use_indicators", True))

    datasets, scalers = [], {}
    feature_cols = None

    for i, tk in enumerate(cfg["ticker_list"]):
        df_feat = load_or_build_features(tk, cfg, cfg["valid_end_date"])
        if df_feat is None or df_feat.empty: continue
        if feature_cols is None: feature_cols = list(df_feat.columns)
        train_df = df_feat.loc[:pd.to_datetime(cfg["train_end_date"])]
        if len(train_df) < ctx + 2: continue
        ts = fit_scalers_per_ticker(train_df, feature_cols)
        scalers[tk] = {"x_mean": ts.x.mean_.tolist(), "x_scale": ts.x.scale_.tolist(),
                       "y_price_mean": ts.y_price.mean_.tolist(), "y_price_scale": ts.y_price.scale_.tolist(),
                       "feature_cols": feature_cols}
        X = train_df[feature_cols].values.astype(np.float32)
        P = train_df[["Open","Close","Low","High"]].values.astype(np.float32)
        V = train_df[["Volume"]].values.astype(np.float32).ravel()
        ds = WindowDataset(X, P, V, ctx=ctx, sx=ts.x, sy_price=ts.y_price, ticker_id=i)
        if len(ds) > 0: datasets.append(ds)

    if not datasets: raise RuntimeError("No training data after preprocessing.")

    train_ds = ConcatDataset(datasets)
    loader = DataLoader(train_ds, batch_size=int(cfg["batch_size"]), shuffle=True, drop_last=True,
                        num_workers=2, pin_memory=True, persistent_workers=True)

    f_in = len(feature_cols)
    model = KronosP100(f_in, cfg).to(dev)

    opt = torch.optim.AdamW(model.parameters(), lr=float(cfg["learning_rate"]), weight_decay=float(cfg.get("weight_decay",0.0)))
    sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=max(1,int(cfg["epochs"])))
    mse = nn.MSELoss(); ce  = nn.CrossEntropyLoss()
    lam = float(cfg.get("lambda_dir", 0.25))
    scaler = GradScaler(enabled=(dev.type=="cuda"))

    torch.backends.cudnn.benchmark = True

    def dir_logits(pred_close_std, last_close_std):
        d = pred_close_std - last_close_std
        return torch.stack([-d, -torch.abs(d), d], dim=1)

    epochs = int(cfg["epochs"])
    model.train()
    for ep in range(1, epochs+1):
        running = 0.0
        opt.zero_grad(set_to_none=True)
        for step, batch in enumerate(tqdm(loader, desc=f"Epoch {ep}/{epochs}", leave=False), start=1):
            xb, y_price_next, y_vol_next = batch
            xb = xb.to(dev); y_price_next = y_price_next.to(dev); y_vol_next = y_vol_next.to(dev)

            with autocast(enabled=(dev.type=="cuda")):
                price_std, vol_log = model(xb)
                vol_log = vol_log.squeeze(1)
                loss_price = mse(price_std, y_price_next)
                loss_vol   = mse(vol_log, y_vol_next)
                last_close_std = xb[:, -1, feature_cols.index("Close")]
                close_pred_std = price_std[:, 1]
                true_diff = y_price_next[:,1] - last_close_std
                y_dir = torch.where(true_diff>0, 2, torch.where(true_diff<0, 0, 1))
                logits = dir_logits(close_pred_std, last_close_std)
                loss_dir = ce(logits, y_dir.long())
                loss = loss_price + loss_vol + lam * loss_dir

            loss = loss / gacc
            scaler.scale(loss).backward()

            if step % gacc == 0:
                scaler.step(opt); scaler.update(); opt.zero_grad(set_to_none=True)

            running += float(loss.item()) * xb.size(0) * gacc

        running /= len(train_ds)
        sched.step()
        print(f"[Epoch {ep}] loss={running:.6f} lr={sched.get_last_lr()[0]:.2e}")

    torch.save({"state_dict": model.state_dict(), "cfg": cfg, "feature_cols": feature_cols}, cfg["model_path"])
    with open(cfg["scaler_path"], "wb") as f: pickle.dump(scalers, f)
    print("OK")

if __name__ == "__main__":
    main()

Writing train.py


In [5]:
%%writefile predict.py
import json, pickle, numpy as np, pandas as pd, torch
from util import load_cfg, ensure_dir, set_seed, load_or_build_features, KronosP100, device_from_cfg

def _consistency(o,c,l,h):
    lo = float(min(o,c,l,h)); hi = float(max(o,c,l,h))
    o = float(min(max(o, lo), hi)); c = float(min(max(c, lo), hi))
    return o, c, lo, hi

def main():
    cfg = load_cfg("config.yaml"); set_seed(cfg.get("random_seed",123)); ensure_dir(cfg["artifacts_dir"])
    dev = device_from_cfg(cfg)
    ckpt = torch.load(cfg["model_path"], map_location="cpu"); feature_cols = ckpt["feature_cols"]
    model = KronosP100(len(feature_cols), cfg).to(dev); model.load_state_dict(ckpt["state_dict"]); model.eval()
    with open(cfg["scaler_path"], "rb") as f: scalers = pickle.load(f)
    ctx = int(cfg["context_length"])
    v_start = pd.to_datetime(cfg["valid_start_date"]).date(); v_end = pd.to_datetime(cfg["valid_end_date"]).date()
    results = {}
    for tk in cfg["ticker_list"]:
        df_feat = load_or_build_features(tk, cfg, cfg["valid_end_date"])
        if df_feat is None or df_feat.empty or tk not in scalers: continue
        if not all(c in df_feat.columns for c in feature_cols): continue
        sx_mean = np.array(scalers[tk]["x_mean"], dtype=np.float32)
        sx_scale= np.where(np.array(scalers[tk]["x_scale"], dtype=np.float32)==0, 1.0, np.array(scalers[tk]["x_scale"], dtype=np.float32))
        py_mean = np.array(scalers[tk]["y_price_mean"], dtype=np.float32)
        py_scale= np.where(np.array(scalers[tk]["y_price_scale"], dtype=np.float32)==0, 1.0, np.array(scalers[tk]["y_price_scale"], dtype=np.float32))
        X = df_feat[feature_cols].values.astype(np.float32); dates = df_feat.index
        pred_map = {}
        for end_idx in range(ctx-1, len(df_feat)-1):
            d = dates[end_idx+1].date()
            if d < v_start or d > v_end: continue
            x_ctx = X[end_idx-(ctx-1):end_idx+1]
            x_norm = (x_ctx - sx_mean) / sx_scale
            with torch.no_grad():
                price_std, vol_log = model(torch.tensor(x_norm[None, ...], dtype=torch.float32, device=dev))
                price_std = price_std.cpu().numpy()[0]; vol_log = vol_log.cpu().numpy()[0,0]
            price = price_std * py_scale + py_mean
            o, c, l, h = float(price[0]), float(price[1]), float(price[2]), float(price[3])
            o, c, l, h = _consistency(o,c,l,h)
            vol = int(np.clip(np.expm1(vol_log), 0, 5e9))
            pred_map[str(d)] = {"Open": float(f"{o:.2f}"), "Close": float(f"{c:.2f}"), "Low": float(f"{l:.2f}"), "High": float(f"{h:.2f}"), "Volume": vol}
        if pred_map: results[tk] = pred_map
    with open(cfg["predictions_path"], "w") as f: json.dump(results, f, indent=2)
    print("OK")

if __name__ == "__main__":
    main()

Writing predict.py


In [6]:
%%writefile evaluate.py
import json, numpy as np, pandas as pd, yfinance as yf, yaml, os

TARGETS = ["Open","Close","Low","High","Volume"]

def _clean(df):
    if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.get_level_values(0)
    cols = [str(c).strip().title() for c in df.columns]; df = df.copy(); df.columns = cols
    out = pd.DataFrame(index=df.index)
    for name in ["Open","High","Low","Close","Volume"]:
        ser = df[name] if name in df.columns else pd.Series(index=df.index, dtype=float)
        out[name] = ser
    return out.dropna(subset=["Open","High","Low","Close","Volume"])

def _pos(index, d):
    loc = index.get_loc(d)
    if isinstance(loc, slice): return int(range(loc.start, loc.stop)[-1])
    if isinstance(loc, (np.ndarray, list)): return int(loc[-1])
    return int(loc)

def _scalar(a, r, c): return float(a[int(r), int(c)])

def _cls(y_t, y_tp1):
    if y_tp1 > y_t: return 1
    if y_tp1 < y_t: return -1
    return 0

def _macro(trues, preds):
    classes = [-1,0,1]
    trues = np.asarray(trues); preds = np.asarray(preds)
    acc = float((trues==preds).mean()) if len(trues) else 0.0
    P=R=F=0.0
    for c in classes:
        tp = np.sum((preds==c)&(trues==c)); fp = np.sum((preds==c)&(trues!=c)); fn = np.sum((preds!=c)&(trues==c))
        p = tp/(tp+fp) if (tp+fp)>0 else 0.0; r = tp/(tp+fn) if (tp+fn)>0 else 0.0; f = (2*p*r)/(p+r) if (p+r)>0 else 0.0
        P+=p; R+=r; F+=f
    return float(P/3), float(R/3), float(F/3), acc

def main():
    with open("config.yaml","r") as f: cfg = yaml.safe_load(f)
    os.makedirs(cfg["artifacts_dir"], exist_ok=True)
    with open(cfg["predictions_path"], "r") as f: preds = json.load(f)
    v_end = pd.to_datetime(cfg["valid_end_date"])
    rows = []
    for t in sorted(preds.keys()):
        gt = yf.download(t, start=cfg["valid_start_date"], end=(v_end + pd.Timedelta(days=1)).strftime("%Y-%m-%d"), progress=False, auto_adjust=False, group_by="column")
        if gt is None or gt.empty: continue
        gt = _clean(gt); gt.index = pd.to_datetime(gt.index).tz_localize(None); gt = gt.sort_index(); gt = gt[~gt.index.duplicated(keep="last")]
        p_dates = [pd.to_datetime(d) for d in preds[t].keys()]
        idx = sorted(set(gt.index) & set(p_dates))
        if not idx: continue
        a = gt[["Open","Close","Low","High","Volume"]].to_numpy()
        m = {"Open":0,"Close":1,"Low":2,"High":3,"Volume":4}
        for target in TARGETS:
            j = m[target]
            y_true, y_pred, dir_true, dir_pred = [], [], [], []
            for d in idx:
                try: pos = _pos(gt.index, d)
                except KeyError: continue
                if pos<=0: continue
                gt_today = _scalar(a, pos, j); prev_val = _scalar(a, pos-1, j)
                pj = preds[t][str(d.date())].get(target)
                if pj is None: continue
                pj = float(pj)
                y_true.append(gt_today); y_pred.append(pj)
                dir_true.append(_cls(prev_val, gt_today)); dir_pred.append(_cls(prev_val, pj))
            if not y_true: continue
            y_true = np.asarray(y_true, dtype=float); y_pred = np.asarray(y_pred, dtype=float)
            mse = float(np.mean((y_true - y_pred)**2))
            P,R,F,Acc = _macro(dir_true, dir_pred) if dir_true and dir_pred else (0,0,0,0)
            rows.append([t, target, round(mse,6), round(P,6), round(R,6), round(F,6), round(Acc,6)])
    out = cfg["evaluation_csv"]
    if rows:
        df = pd.DataFrame(rows, columns=["Ticker","Target","MSE","Precision","Recall","F1","Accuracy"])
        order = {k:i for i,k in enumerate(["Open","Close","Low","High","Volume"])}
        df = df.sort_values(by=["Ticker","Target"], key=lambda s: s.map(order) if s.name=="Target" else s)
        df.to_csv(out, index=False); print("OK")
    else:
        print("OK")

if __name__ == "__main__":
    main()

Writing evaluate.py


In [7]:
!python train.py
!python predict.py
!python evaluate.py

  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return op(a, b)
  return o

In [8]:
import pandas as pd

df = pd.read_csv("artifacts/evaluation.csv")
wide = (df.melt(id_vars=["Ticker","Target"], var_name="Metric", value_name="Val")
          .pivot_table(index="Ticker", columns=["Target","Metric"], values="Val"))
target_order = ["Open","Close","Low","High","Volume"]
metric_order = ["MSE","Precision","Recall","F1","Accuracy"]
wide = wide.reindex(columns=pd.MultiIndex.from_product([target_order, metric_order]))
wide.to_csv("artifacts/evaluation_wide.csv")
display(wide.head(15))
print("Saved:", "artifacts/evaluation_wide.csv")

Unnamed: 0_level_0,Open,Open,Open,Open,Open,Close,Close,Close,Close,Close,...,High,High,High,High,High,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,MSE,Precision,Recall,F1,Accuracy,MSE,Precision,Recall,F1,Accuracy,...,MSE,Precision,Recall,F1,Accuracy,MSE,Precision,Recall,F1,Accuracy
Ticker,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AAPL,10.379968,0.424695,0.402778,0.364984,0.561404,10.690281,0.297619,0.299835,0.289495,0.438596,...,14.559836,0.392593,0.37284,0.339947,0.54386,1904794000000000.0,0.439744,0.367406,0.303431,0.54386
AMD,29.506822,0.420918,0.37973,0.287279,0.45614,36.985578,0.424901,0.392583,0.336232,0.526316,...,34.354926,0.421569,0.36786,0.281532,0.473684,1221499000000000.0,0.152047,0.333333,0.208835,0.45614
AMTX,0.108797,0.513333,0.413793,0.363396,0.596491,0.058516,0.388889,0.402778,0.360269,0.54386,...,0.09513,0.5,0.430108,0.372222,0.578947,1070454000000.0,0.5,0.40625,0.34188,0.561404
AMZN,11.264524,0.54386,0.534392,0.538288,0.824561,17.80248,0.430556,0.386364,0.368126,0.631579,...,15.503938,0.5169,0.465909,0.466596,0.736842,580772300000000.0,0.368881,0.358561,0.34188,0.561404
BE,5.426228,0.454268,0.436364,0.394647,0.596491,9.749658,0.328063,0.331481,0.260907,0.403509,...,8.265858,0.379365,0.371693,0.323685,0.491228,40427660000000.0,0.44289,0.411667,0.403509,0.649123
BN,0.452202,0.528395,0.529363,0.526273,0.789474,1.267621,0.366097,0.361869,0.344532,0.526316,...,0.720054,0.471867,0.466749,0.465608,0.701754,6327283000000.0,0.169591,0.333333,0.224806,0.508772
CEG,47.942251,0.571225,0.540529,0.54329,0.824561,76.832996,0.31773,0.324167,0.2956,0.526316,...,53.746456,0.467178,0.463193,0.463841,0.701754,5613969000000.0,0.494048,0.344444,0.238373,0.491228
CLNE,0.004723,0.522746,0.538324,0.518239,0.77193,0.012067,0.301871,0.323048,0.281183,0.508772,...,0.010028,0.334656,0.344444,0.336847,0.508772,2298719000000.0,0.47534,0.401888,0.359892,0.596491
COP,2.916874,0.482883,0.469622,0.463841,0.701754,4.237379,0.315681,0.315681,0.315681,0.473684,...,3.953227,0.428775,0.416049,0.402778,0.614035,65432510000000.0,0.163743,0.333333,0.219608,0.491228
CSCO,5.384144,0.464286,0.342857,0.206553,0.403509,6.165392,0.157895,0.333333,0.214286,0.473684,...,5.837777,0.128655,0.333333,0.185654,0.385965,184457600000000.0,0.166667,0.321839,0.219608,0.491228


Saved: artifacts/evaluation_wide.csv


In [10]:
import pandas as pd

df = pd.read_csv("artifacts/evaluation.csv")

excel_path = "artifacts/evaluation.xlsx"
df.to_excel(excel_path, index=False)

print("Excel file saved at:", excel_path)

Excel file saved at: artifacts/evaluation.xlsx


In [9]:
summary = (df.groupby("Target")[["MSE","Precision","Recall","F1","Accuracy"]]
             .mean().round(4).reset_index())
display(summary)
summary.to_csv("artifacts/summary_by_target.csv", index=False)
print("Saved: artifacts/summary_by_target.csv")

Unnamed: 0,Target,MSE,Precision,Recall,F1,Accuracy
0,Close,58.4912,0.3025,0.337,0.282,0.4934
1,High,54.6295,0.3437,0.3678,0.3074,0.5246
2,Low,50.2491,0.3648,0.3691,0.3024,0.5171
3,Open,43.6892,0.4549,0.4239,0.3771,0.6167
4,Volume,603323700000000.0,0.3618,0.3644,0.2995,0.5404


Saved: artifacts/summary_by_target.csv


In [11]:
import shutil, os, glob, json, yaml

with open("config.yaml","r") as f:
    cfg = yaml.safe_load(f)
with open("artifacts/config_used.yaml","w") as f:
    yaml.safe_dump(cfg, f)

include = [
    "config.yaml",
    "artifacts/config_used.yaml",
    "artifacts/kronos_p100.pth",
    "artifacts/scalers.pkl",
    "artifacts/predictions.json",
    "artifacts/evaluation.csv",
    "artifacts/evaluation_wide.csv",
    "artifacts/summary_by_target.csv",
    "train.py", "predict.py", "evaluate.py", "util.py",
]
include = [p for p in include if os.path.exists(p)]
shutil.make_archive("submission_artifacts", "zip", root_dir=".", base_dir=".")

os.makedirs("submit_bundle", exist_ok=True)
for p in include:
    tgt = os.path.join("submit_bundle", p)
    os.makedirs(os.path.dirname(tgt), exist_ok=True)
    shutil.copy2(p, tgt)
shutil.make_archive("submit_bundle", "zip", "submit_bundle")
print("Created: submit_bundle.zip")

Created: submit_bundle.zip
