#Model(TinyLlama) training and evaluation

Training on Features_Group_1, Epochs 3

In [None]:

import os, re, math, json, random
import numpy as np
import pandas as pd
from dataclasses import dataclass
from typing import List, Dict, Optional

import torch
from torch.utils.data import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from transformers.trainer_utils import set_seed
from peft import LoraConfig, get_peft_model
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler

# ============================  CONFIG  ============================
CONFIG = {
    "feature_files": ["/content/features_trading_only_2.csv"],
    "date_col": "date",
    "vol_col": "volume",
    "label_col": "z_target",
    "context_len": 64,
    "max_features": 16,
    "model_name": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    "seed": 42,
    "train_frac": 0.9,
    "epochs": 3,
    "lr": 1e-4,
    "train_bs": 2,
    "grad_accum": 16,
    "max_length": 1024,
    "warmup_ratio": 0.1,
    "weight_decay": 0.01,
    "lora_r": 16,
    "lora_alpha": 32,
    "lora_dropout": 0.05,
    "output_dir": "/content/tinyllama_ts_lora",
    "bf16": True,
}

set_seed(CONFIG["seed"])

# ======================  LOAD & MERGE FEATURES  ==================
def load_and_merge(paths: List[str], date_col: str):
    dfs = []
    for p in paths:
        df = pd.read_csv(p)
        df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
        df = df.dropna(subset=[date_col])
        dfs.append(df)
    all_df = pd.concat(dfs, axis=0, ignore_index=True).sort_values(date_col).reset_index(drop=True)
    all_df = all_df.replace([np.inf, -np.inf], np.nan).ffill().bfill()
    return all_df

df = load_and_merge(CONFIG["feature_files"], CONFIG["date_col"])
print("Data shape:", df.shape)
print(df[[CONFIG["date_col"], CONFIG["vol_col"], "y_trading", "y_log1p", CONFIG["label_col"]]].head())

# =====================  SELECT TOP-K FEATURES  ====================
EXCLUDE_COLS = {CONFIG["date_col"], CONFIG["label_col"], CONFIG["vol_col"], "y_trading", "y_log1p"}
num = df.select_dtypes(include=[np.number]).copy()
std = num.std(numeric_only=True)
non_const = std[std > 0].index.tolist()
num = num[non_const]
cand = [c for c in num.columns if c not in EXCLUDE_COLS]
if not cand:
    raise ValueError("No candidate numeric features found after exclusions.")
corr = num[cand].corrwith(num[CONFIG["label_col"]]).abs().replace([np.inf, -np.inf], np.nan).fillna(0.0)
feature_cols = corr.sort_values(ascending=False).index.tolist()[:CONFIG["max_features"]]
print("Selected feature columns:", feature_cols)

# ====================  BUILD z_history SERIES  ====================

vol = df[CONFIG["vol_col"]].astype(float).values.reshape(-1,1)
z_hist_scaler = StandardScaler()
z_hist_series = z_hist_scaler.fit_transform(np.log1p(vol)).reshape(-1)

# ====================  BUILD WINDOWS  ====================
def make_windows(df: pd.DataFrame, z_hist: np.ndarray, ctx: int, feat_cols: List[str], label_col: str):
    X, Y = [], []
    n = len(df)
    for t in range(ctx, n):
        hist = z_hist[t-ctx:t].tolist()
        feats = df.iloc[t][feat_cols].to_dict()
        y = float(df.iloc[t][label_col])
        X.append((hist, feats))
        Y.append(y)
    return X, np.array(Y, dtype=np.float32)

X_raw, Y = make_windows(df, z_hist_series, CONFIG["context_len"], feature_cols, CONFIG["label_col"])
print("Total samples:", len(X_raw))

# ===================  TRAIN / VAL SPLIT  ==========================
N = len(X_raw)
cut = int(N * CONFIG["train_frac"])
train_idx = np.arange(0, cut)
val_idx = np.arange(cut, N)

def ex_to_text(hist, feats, y_z):
    hist_str = ", ".join(f"{x:.4f}" for x in hist)
    feats_str = ", ".join(f"{k}={float(v):.4f}" for k,v in feats.items()) if feats else "none"
    prompt = f"z_hist[{len(hist)}]:{hist_str}\nfeat:{feats_str}\nnext_z:"
    target = f"{y_z:.5f}\n"
    return {"prompt": prompt, "target": target}

train_text = [ex_to_text(*X_raw[i], Y[i]) for i in train_idx]
val_text   = [ex_to_text(*X_raw[i], Y[i]) for i in val_idx]

# ===================  TOKENIZER / MODEL  =========================
model_name = CONFIG["model_name"]
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

torch_dtype = (
    torch.bfloat16
    if (CONFIG["bf16"] and torch.cuda.is_available() and torch.cuda.get_device_capability(0)[0] >= 8)
    else torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch_dtype,
    device_map="auto",
    attn_implementation="sdpa",   # no extra installs
)
model.config.use_cache = False
model.gradient_checkpointing_enable()

# LoRA targets for TinyLlama blocks
lora_cfg = LoraConfig(
    r=CONFIG["lora_r"], lora_alpha=CONFIG["lora_alpha"], lora_dropout=CONFIG["lora_dropout"],
    bias="none", task_type="CAUSAL_LM",
    target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"]
)
model = get_peft_model(model, lora_cfg)
model.print_trainable_parameters()

# ===================  DATASET / COLLATOR  ========================
class TxtDS(Dataset):
    def __init__(self, examples, tok, max_len=1024):
        self.ex = examples; self.tok = tok; self.max_len = max_len
    def __len__(self): return len(self.ex)
    def __getitem__(self, i):
        e = self.ex[i]
        p_ids = self.tok(e["prompt"], add_special_tokens=False)["input_ids"]
        t_ids = self.tok(e["target"], add_special_tokens=False)["input_ids"]
        ids = p_ids + t_ids
        if len(ids) > self.max_len:
            overflow = len(ids) - self.max_len
            keep_p = max(0, len(p_ids) - overflow)
            ids = p_ids[-keep_p:] + t_ids
        p_len = min(len(p_ids), len(ids) - len(t_ids))
        labels = [-100]*p_len + ids[p_len:]
        attn = [1]*len(ids)
        return {
            "input_ids": torch.tensor(ids, dtype=torch.long),
            "attention_mask": torch.tensor(attn, dtype=torch.long),
            "labels": torch.tensor(labels, dtype=torch.long),
        }

def pad_batch(batch, pad_id):
    mx = max(len(b["input_ids"]) for b in batch)
    out = {"input_ids": [], "attention_mask": [], "labels": []}
    for b in batch:
        pad_n = mx - len(b["input_ids"])
        out["input_ids"].append(torch.cat([b["input_ids"], torch.full((pad_n,), pad_id, dtype=torch.long)]))
        out["attention_mask"].append(torch.cat([b["attention_mask"], torch.zeros(pad_n, dtype=torch.long)]))
        out["labels"].append(torch.cat([b["labels"], torch.full((pad_n,), -100, dtype=torch.long)]))
    return {k: torch.stack(v) for k,v in out.items()}

def collate_fn(features):
    return pad_batch(features, tokenizer.pad_token_id)

train_ds = TxtDS(train_text, tokenizer, CONFIG["max_length"])
val_ds   = TxtDS(val_text, tokenizer, CONFIG["max_length"])

# ===================  TRAIN (fast)  ==========================
args = TrainingArguments(
    output_dir=CONFIG["output_dir"],
    num_train_epochs=CONFIG["epochs"],
    per_device_train_batch_size=CONFIG["train_bs"],
    per_device_eval_batch_size=CONFIG["train_bs"],
    gradient_accumulation_steps=CONFIG["grad_accum"],
    learning_rate=CONFIG["lr"],
    weight_decay=CONFIG["weight_decay"],
    warmup_ratio=CONFIG["warmup_ratio"],
    logging_steps=50,
    save_strategy="epoch",
    lr_scheduler_type="cosine",
    bf16=(torch_dtype==torch.bfloat16),
    fp16=(torch_dtype==torch.float16),
    dataloader_num_workers=2,
    report_to="none",
    do_eval=False
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    data_collator=collate_fn,
    tokenizer=tokenizer,
)
trainer.train()

# ===================  EVAL (invert to original units)  ===========
def load_scaler_json(paths):
    base = os.path.dirname(paths[0])
    cand = os.path.join(base, "features_trading_only_scaler_2.json")
    if os.path.exists(cand):
        with open(cand, "r") as f:
            s = json.load(f)
        return float(s["y_log1p_mean"]), float(s["y_log1p_std"])
    mu = float(df["y_log1p"].mean()) if "y_log1p" in df.columns else 0.0
    sigma = float(df["y_log1p"].std(ddof=0)) if "y_log1p" in df.columns and df["y_log1p"].std(ddof=0)>0 else 1.0
    return mu, sigma

mu, sigma = load_scaler_json(CONFIG["feature_files"])

def number_from_text(text: str) -> Optional[float]:
    m = re.search(r"[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?", text)
    return float(m.group(0)) if m else None

def evaluate(model, tok, val_examples, mu, sigma, max_new_tokens=12):
    model.eval()
    preds_z, trues_z = [], []
    for ex in val_examples:
        ids = tok(ex["prompt"], return_tensors="pt").to(model.device)
        with torch.no_grad():
            out = model.generate(
                **ids, max_new_tokens=max_new_tokens, do_sample=False,
                pad_token_id=tok.pad_token_id, eos_token_id=tok.eos_token_id
            )
        gen = tok.decode(out[0][ids["input_ids"].shape[1]:], skip_special_tokens=True)
        z_hat = number_from_text(gen)
        if z_hat is None: continue
        preds_z.append(z_hat)
        trues_z.append(float(re.findall(r"[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?", ex["target"])[0]))
    if not preds_z:
        return {"val_MAE": float("nan"), "val_RMSE": float("nan")}
    preds_z = np.array(preds_z); trues_z = np.array(trues_z)
    y_pred = np.expm1(preds_z * sigma + mu)
    y_true = np.expm1(trues_z * sigma + mu)
    return {
        "val_MAE": mean_absolute_error(y_true, y_pred),
        "val_RMSE": math.sqrt(mean_squared_error(y_true, y_pred)),
    }

val_pairs = [{"prompt": e["prompt"], "target": e["target"]} for e in val_text]
metrics = evaluate(model, tokenizer, val_pairs, mu, sigma)
print("Validation metrics:", metrics)

# ===================  INFERENCE  ==========================
def forecast_next(raw_recent_volumes: List[float], last_feat_row: Dict[str,float], mu: float, sigma: float, k_decimals=5) -> float:
    z_hist = z_hist_scaler.transform(np.log1p(np.array(raw_recent_volumes).reshape(-1,1))).reshape(-1)
    hist_str = ", ".join(f"{z:.4f}" for z in z_hist[-CONFIG["context_len"]:])
    feats_str = ", ".join(f"{k}={float(v):.4f}" for k,v in last_feat_row.items()) if last_feat_row else "none"
    prompt = f"z_hist[{len(z_hist[-CONFIG['context_len']:])}]:{hist_str}\nfeat:{feats_str}\nnext_z:"
    ids = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        out = model.generate(**ids, max_new_tokens=12, do_sample=False,
                             pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id)
    gen = tokenizer.decode(out[0][ids["input_ids"].shape[1]:], skip_special_tokens=True)
    z_hat = number_from_text(gen)
    if z_hat is None: raise RuntimeError("Model did not return a numeric answer.")
    return float(round(np.expm1(z_hat * sigma + mu), k_decimals))

print("TinyLlama LoRA fine-tune done. Use forecast_next(...) for inference.")


Data shape: (6890, 53)
        date    volume   y_trading    y_log1p  z_target
0 1998-02-11  15819189  13524081.0  16.419983  0.920051
1 1998-02-12  13524081   8694402.0  15.978190  0.337016
2 1998-02-13   8694402  14912102.0  16.517684  1.048988
3 1998-02-17  14912102  12788824.0  16.364082  0.846279
4 1998-02-18  12788824  16986901.0  16.647953  1.220905
Selected feature columns: ['ema28', 'ema7', 'roll14_mean', 'roll14_sum', 'roll28_sum', 'roll28_mean', 'roll7_mean', 'roll7_sum', 'year', 'roll7_min', 'roll14_min', 'roll28_min', 'lag1', 'lag2', 'roll7_max', 'fourier_t_sin1']
Total samples: 6826


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

trainable params: 12,615,680 || all params: 1,112,664,064 || trainable%: 1.1338


  trainer = Trainer(


Step,Training Loss
50,1.7222
100,1.3562
150,1.3481
200,1.3419
250,1.3402
300,1.3353
350,1.3443
400,1.3371
450,1.3274
500,1.331


Validation metrics: {'val_MAE': 772539.2447184883, 'val_RMSE': 1432829.1655742198}
TinyLlama LoRA fine-tune done. Use forecast_next(...) for inference.


Loading LoRA Adapters for Evaluation of Model

In [None]:
# ---- Auto-find and load your LoRA adapters ----
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
SEARCH_ROOTS = ["/content/tinyllama_ts_lora", "/content"]

import os, time, torch, json
from typing import Optional, Tuple
from transformers import AutoTokenizer, AutoModelForCausalLM
try:
    from peft import AutoPeftModelForCausalLM
    PEFT_OK = True
except Exception:
    PEFT_OK = False

def find_latest_adapter(root_dirs) -> Optional[str]:
    """
    Return dir containing BOTH adapter_model.* and adapter_config.json with the newest mtime.
    Looks recursively under the given roots (handles trainer's checkpoint-* subdirs).
    """
    best_dir, best_mtime = None, -1
    for root in root_dirs:
        if not os.path.exists(root):
            continue
        for cur, _, files in os.walk(root):
            has_cfg = "adapter_config.json" in files
            has_ad = any(f.startswith("adapter_model.") for f in files)
            if has_cfg and has_ad:
                ad_files = [os.path.join(cur, f) for f in files if f.startswith("adapter_model.")]
                mtime = max(os.path.getmtime(p) for p in ad_files)
                if mtime > best_mtime:
                    best_mtime, best_dir = mtime, cur
    return best_dir

def find_merged_model(root_dirs) -> Optional[str]:
    """Return dir containing a full merged model (model.safetensors / pytorch_model.*)."""
    for root in root_dirs:
        if not os.path.exists(root):
            continue
        for cur, _, files in os.walk(root):
            has_full = any(f in files for f in ["model.safetensors","pytorch_model.bin","pytorch_model.safetensors"])
            if has_full:
                return cur
    return None

def ensure_adapter_config(dirpath: str, base_model: str):
    """If adapter_config.json missing, write a minimal one that matches your LoRA training params."""
    cfg_path = os.path.join(dirpath, "adapter_config.json")
    if os.path.exists(cfg_path):
        return
    lora_cfg = {
        "base_model_name_or_path": base_model,
        "peft_type": "LORA",
        "task_type": "CAUSAL_LM",
        "r": 16, "lora_alpha": 32, "lora_dropout": 0.05,
        "bias": "none", "inference_mode": False,
        "target_modules": ["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"]
    }
    with open(cfg_path, "w") as f: json.dump(lora_cfg, f)
    print(f"[fix] wrote missing adapter_config.json at: {cfg_path}")

def load_ready_model_and_tokenizer(base_model: str, roots) -> Tuple[AutoModelForCausalLM, AutoTokenizer, str]:

    tok = AutoTokenizer.from_pretrained(base_model, use_fast=True, legacy=False)
    if tok.pad_token is None: tok.pad_token = tok.eos_token
    dtype = torch.bfloat16 if (torch.cuda.is_available() and torch.cuda.get_device_capability(0)[0] >= 8) else torch.float16


    adir = find_latest_adapter(roots)
    if adir and PEFT_OK:
        ensure_adapter_config(adir, base_model)
        try:
            mdl = AutoPeftModelForCausalLM.from_pretrained(adir, torch_dtype=dtype, device_map="auto")
            mdl = mdl.merge_and_unload()
            mdl.config.use_cache = False
            print(f"[ok] loaded & merged LoRA adapters from: {adir}")
            return mdl, tok, "adapters_merged"
        except Exception as e:
            print(f"[warn] adapter load failed at {adir}: {e}")


    mdir = find_merged_model(roots)
    if mdir:
        mdl = AutoModelForCausalLM.from_pretrained(mdir, torch_dtype=dtype, device_map="auto")
        mdl.config.use_cache = False
        print(f"[ok] loaded full merged model from: {mdir}")
        return mdl, tok, "merged_full"


    mdl = AutoModelForCausalLM.from_pretrained(base_model, torch_dtype=dtype, device_map="auto")
    mdl.config.use_cache = False
    print("[WARN] adapters/merged not found — using BASE ONLY.")
    return mdl, tok, "base_only"

model, tokenizer, mode = load_ready_model_and_tokenizer(BASE_MODEL, SEARCH_ROOTS)

# sanity cheching:
print("mode:", mode)
print("device:", next(model.parameters()).device)

[ok] loaded & merged LoRA adapters from: /content/tinyllama_ts_lora/checkpoint-576
mode: adapters_merged
device: cuda:0


Performance of TinyLlama, trained on Feature_Group_1 with 3 epochs!

In [None]:
# Config
FEATURE_FILE = "/content/features_trading_only_2.csv"
DATE_COL, VOL_COL, LABEL_COL = "date", "volume", "z_target"
CTX, MAX_FEATURES, TRAIN_FRAC, MAX_LEN = 64, 16, 0.9, 1024

import os, re, math, json, numpy as np, pandas as pd, torch
from typing import List
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from transformers import LogitsProcessor

# ---------- Load & prep features ----------
df = pd.read_csv(FEATURE_FILE)
df[DATE_COL] = pd.to_datetime(df[DATE_COL], errors="coerce")
df = df.dropna(subset=[DATE_COL]).sort_values(DATE_COL).reset_index(drop=True)
df = df.replace([np.inf, -np.inf], np.nan).ffill().bfill()

# Top-K by absolute Pearson (keep consistent with your training)
EXCL = {DATE_COL, LABEL_COL, VOL_COL, "y_trading", "y_log1p"}
num = df.select_dtypes(include=[np.number]).copy()
std = num.std(numeric_only=True)
non_const = std[std > 0].index.tolist()
num = num[non_const]
cand = [c for c in num.columns if c not in EXCL]
if not cand: raise ValueError("No candidate numeric features found.")
corr = num[cand].corrwith(num[LABEL_COL]).abs().replace([np.inf,-np.inf], np.nan).fillna(0.0)
feature_cols = corr.sort_values(ascending=False).index.tolist()[:MAX_FEATURES]

# z-history from log1p(volume)
vol = df[VOL_COL].astype(float).values.reshape(-1,1)
z_hist_scaler = StandardScaler()
z_hist_series = z_hist_scaler.fit_transform(np.log1p(vol)).reshape(-1)

def make_windows(df, z_hist, ctx, feat_cols, label_col):
    X, Y = [], []
    for t in range(ctx, len(df)):
        X.append((z_hist[t-ctx:t].tolist(), df.iloc[t][feat_cols].to_dict()))
        Y.append(float(df.iloc[t][label_col]))
    return X, np.array(Y, dtype=np.float32)

X_raw, Y = make_windows(df, z_hist_series, CTX, feature_cols, LABEL_COL)
cut = int(len(X_raw) * TRAIN_FRAC)
val_text = []
for hist, feats in X_raw[cut:]:
    hist_str  = ", ".join(f"{x:.4f}" for x in hist)
    feats_str = ", ".join(f"{k}={float(v):.4f}" for k,v in feats.items()) if feats else "none"
    prompt = f"z_hist[{len(hist)}]:{hist_str}\nfeat:{feats_str}\nnext_z:"
    val_text.append({"prompt": prompt})

# scaler to invert to original units
def load_scaler_json(feature_path: str):
    base = os.path.dirname(feature_path)
    cand = os.path.join(base, "features_trading_only_scaler_2.json")
    if os.path.exists(cand):
        with open(cand, "r") as f:
            s = json.load(f)
        return float(s["y_log1p_mean"]), float(s["y_log1p_std"])
    mu = float(df["y_log1p"].mean()) if "y_log1p" in df.columns else 0.0
    std = df["y_log1p"].std(ddof=0) if "y_log1p" in df.columns else 1.0
    return mu, (float(std) if std and std > 0 else 1.0)

mu, sigma = load_scaler_json(FEATURE_FILE)

# y_val (ground truth)
y_va_z = Y[cut:]
y_val  = np.expm1(y_va_z * sigma + mu)

# ---------- Numeric-constrained decoding ----------
class DigitsOnly(LogitsProcessor):
    def __init__(self, tok, device):
        allowed_chars = set("0123456789-+.eE \n")
        ids = []
        for i in range(tok.vocab_size):
            s = tok.decode([i])
            if s and set(s).issubset(allowed_chars):
                ids.append(i)
        self.allowed_ids = torch.tensor(ids, device=device)
    def __call__(self, input_ids, scores):
        mask = torch.full_like(scores, float("-inf"))
        mask[:, self.allowed_ids] = 0
        return scores + mask

digits_only = DigitsOnly(tokenizer, device=next(model.parameters()).device)
num_pat = re.compile(r"[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?")
def number_from_text(s):
    m = num_pat.search(s);
    return float(m.group(0)) if m else None

# ---------- Predict TinyLlama on validation ----------
model.eval()
preds_z = []
for ex in val_text:
    ids = tokenizer(ex["prompt"], return_tensors="pt", truncation=True, max_length=MAX_LEN).to(next(model.parameters()).device)
    with torch.no_grad():
        out = model.generate(**ids, max_new_tokens=12, do_sample=False,
                             logits_processor=[digits_only],
                             pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id)
    gen = tokenizer.decode(out[0][ids["input_ids"].shape[1]:], skip_special_tokens=True)
    z_hat = number_from_text(gen)
    preds_z.append(np.nan if z_hat is None else z_hat)

preds_z = np.array(preds_z, dtype=float)
mask = ~np.isnan(preds_z)
if mask.sum() == 0:
    raise RuntimeError("Model returned no numeric outputs. Check prompts/decoding.")
y_pred = np.expm1(preds_z[mask] * sigma + mu)
y_true = y_val[mask]

# ---------- Metrics ----------
def metrics(y_true, y_pred):
    mae  = mean_absolute_error(y_true, y_pred)
    rmse = math.sqrt(mean_squared_error(y_true, y_pred))
    mape = float(np.mean(np.abs((y_true - y_pred) / np.clip(y_true, 1e-9, None))) * 100)
    return {"MAE": mae, "RMSE": rmse, "MAPE%": mape}

print(f"Aligned eval samples: {len(y_true)} / {len(y_val)}")
print("TinyLlama (text→z→volume):", metrics(y_true, y_pred))

# ---------- Naive baselines on same span ----------

y_all = np.expm1(Y * sigma + mu)
tail_len = len(y_true)
truth_tail = y_all[-tail_len:]

def seasonal_naive(series, season=5):
    yhat = np.roll(series, season); yhat[:season] = series[:season]; return yhat
def moving_avg(series, k=7):
    s = pd.Series(series)
    return s.rolling(k, min_periods=1).mean().shift(1).bfill().to_numpy()

sn = seasonal_naive(np.r_[y_all[:-tail_len], truth_tail])[-tail_len:]
ma = moving_avg(np.r_[y_all[:-tail_len], truth_tail])[-tail_len:]
print("Seasonal naive:", metrics(truth_tail, sn))
print("Moving average:", metrics(truth_tail, ma))

# ---------- Preview few predictions ----------
for i in range(min(10, len(y_true))):
    print(f"{i:02d} | true={y_true[i]:.2f}  pred={y_pred[i]:.2f}")

Aligned eval samples: 683 / 683
TinyLlama (text→z→volume): {'MAE': 769220.187365776, 'RMSE': 1419827.828691292, 'MAPE%': 35.59028644550582}
Seasonal naive: {'MAE': 1035122.125, 'RMSE': 1966850.8488850903, 'MAPE%': 43.81184768676758}
Moving average: {'MAE': 783198.1818787911, 'RMSE': 1475405.823909972, 'MAPE%': 32.859275970257514}
00 | true=2165886.50  pred=2899729.81
01 | true=3933762.75  pred=3013983.48
02 | true=2430223.25  pred=2679824.59
03 | true=3374707.00  pred=2901927.90
04 | true=3410402.75  pred=2749887.98
05 | true=3193953.75  pred=3130365.99
06 | true=11314851.00  pred=3106547.08
07 | true=5218058.50  pred=3376760.99
08 | true=3779608.00  pred=3376581.88
09 | true=4183481.25  pred=3615025.68
