# ChronoTick 2: Chronos-2 Fine-Tuning

Fine-tune Amazon Chronos-2 Small (28M) using LoRA and full fine-tuning.
Supports covariates via past_covariates and future_covariates dicts.

## Experiments
- E1: LoRA FT on univariate drift only
- E2: LoRA FT with sensor covariates
- E3: Full FT vs LoRA comparison

## Training Mode
Set `TRAINING_MODE` to "combined" (all 4 machines) or "per_machine".

In [None]:
import os
import subprocess
import sys

IN_COLAB = "COLAB_GPU" in os.environ or os.path.exists("/content")

if IN_COLAB:
    REPO_DIR = "/content/sensor-collector"
    REPO_URL = "https://github.com/JaimeCernuda/sensor-collector.git"
    GITHUB_TOKEN = None
    try:
        from google.colab import userdata
        GITHUB_TOKEN = userdata.get("GITHUB_TOKEN")
    except Exception:
        print("WARNING: GITHUB_TOKEN not available")

    auth_url = (
        f"https://{GITHUB_TOKEN}@github.com/JaimeCernuda/sensor-collector.git"
        if GITHUB_TOKEN
        else REPO_URL
    )

    if os.path.exists(REPO_DIR):
        subprocess.run(
            ["git", "-C", REPO_DIR, "remote", "set-url", "origin", auth_url],
            check=True,
        )
        subprocess.run(
            ["git", "-C", REPO_DIR, "fetch", "-q", "origin"], check=True
        )
        subprocess.run(
            ["git", "-C", REPO_DIR, "reset", "--hard", "origin/main"],
            check=True,
        )
    else:
        subprocess.run(
            ["git", "clone", "-q", auth_url, REPO_DIR], check=True
        )

    subprocess.run(
        ["git", "-C", REPO_DIR, "config", "user.name", "Colab Runner"],
        check=True,
    )
    subprocess.run(
        ["git", "-C", REPO_DIR, "config", "user.email", "colab@chronotick.dev"],
        check=True,
    )
    subprocess.run(
        ["pip", "install", "-q", "-e", f"{REPO_DIR}/tick2/"], check=True
    )

    tick2_src = f"{REPO_DIR}/tick2/src"
    if tick2_src not in sys.path:
        sys.path.insert(0, tick2_src)

    # Always mount Drive — needed for checkpoint persistence (models too large for git)
    from google.colab import drive
    drive.mount("/content/drive")

    # Data: prefer repo copy, fall back to Drive
    DATA_DIR = f"{REPO_DIR}/sensors/data"
    if not os.path.isdir(f"{DATA_DIR}/24h_snapshot"):
        DATA_DIR = "/content/drive/MyDrive/chronotick2/data"

    RESULTS_DIR = f"{REPO_DIR}/tick2/notebooks/output/03"
else:
    GITHUB_TOKEN = None
    DATA_DIR = None
    RESULTS_DIR = os.path.join(
        os.path.dirname("__file__") if "__file__" in dir() else ".",
        "output",
        "03",
    )

DEVICE_DIR_MAP = {"cuda": "gpu", "cpu": "cpu"}


def checkpoint_push(label):
    """Git add, commit, and push results after a step completes."""
    if not IN_COLAB:
        return
    try:
        subprocess.run(
            ["git", "-C", REPO_DIR, "add", "tick2/notebooks/output/03/"],
            check=True,
            capture_output=True,
        )
        status = subprocess.run(
            [
                "git", "-C", REPO_DIR, "status", "--porcelain",
                "tick2/notebooks/output/03/",
            ],
            capture_output=True,
            text=True,
        )
        if not status.stdout.strip():
            return
        subprocess.run(
            [
                "git", "-C", REPO_DIR, "commit", "-m",
                f"results: notebook 03b chronos2 {label} ({device_label})",
            ],
            check=True,
            capture_output=True,
        )
        if GITHUB_TOKEN:
            subprocess.run(["git", "-C", REPO_DIR, "fetch", "-q", "origin"], capture_output=True, timeout=30)
            subprocess.run(["git", "-C", REPO_DIR, "rebase", "origin/main"], capture_output=True, timeout=30)
            subprocess.run(
                ["git", "-C", REPO_DIR, "push"],
                check=True,
                capture_output=True,
                timeout=60,
            )
            print(f"  [CHECKPOINT] Pushed {label}")
    except Exception as e:
        print(f"  [CHECKPOINT WARNING] {e}")


print(f"Environment: {'Colab' if IN_COLAB else 'Local'}")

In [None]:
# === Install Chronos-2 dependencies ===
if IN_COLAB:
    subprocess.run(["pip", "install", "-q", "chronos-forecasting[extras]>=2.2"], check=True)

from chronos import BaseChronosPipeline

print("chronos-forecasting ready")

In [None]:
# === Imports, Config & Training Mode ===
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from pathlib import Path

from tick2.data.preprocessing import TARGET_COL, load_all
from tick2.finetuning.base import FineTuneConfig
from tick2.finetuning.data_prep import prepare_datasets
from tick2.finetuning.chronos2_ft import finetune_chronos2, load_finetuned_chronos2
from tick2.finetuning.evaluate import (
    evaluate_finetuned,
    load_zero_shot_baselines,
    compare_ft_vs_zero_shot,
)
from tick2.utils.gpu import clear_gpu_memory

sns.set_theme(style="whitegrid", font_scale=1.1)

# --- Training configuration ---
# TRAINING_MODE: "combined" = train on all 4 machines, "per_machine" = separate
TRAINING_MODE = "combined"
# DEVICE_OVERRIDE: None = auto-detect, "cuda" = force GPU, "cpu" = force CPU
DEVICE_OVERRIDE = None
# FORCE_RETRAIN: re-run fine-tuning even if cached results exist
FORCE_RETRAIN = False
# FINETUNE_MODE: "lora" = LoRA adapter, "full" = full parameter fine-tuning
FINETUNE_MODE = "lora"  # "lora" or "full"

device = DEVICE_OVERRIDE or ("cuda" if torch.cuda.is_available() else "cpu")
device_label = DEVICE_DIR_MAP.get(device, device)

if device == "cuda":
    props = torch.cuda.get_device_properties(0)
    vram = getattr(props, "total_memory", getattr(props, "total_mem", 0))
    print(f"GPU:  {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {vram / 1024**3:.1f} GB")
else:
    print("Running on CPU")

config = FineTuneConfig(
    context_length=2048,
    prediction_length=96,
    max_covariates=30,
    seed=42,
)

print(f"Device: {device}, Mode: {TRAINING_MODE}, FT: {FINETUNE_MODE}")
print(f"Context: {config.context_length}, Prediction: {config.prediction_length}")

In [None]:
# === Load and Prepare Data ===
data_dir = Path(DATA_DIR) if DATA_DIR else None
prepared = prepare_datasets(config, data_dir=data_dir)

for name, p in prepared.items():
    print(
        f"  {name:16s}: train={len(p.split.train)}, "
        f"val={len(p.split.val)}, test={len(p.split.test)}, "
        f"features={len(p.feature_cols)}"
    )

In [None]:
# === Fine-Tune Chronos-2 ===
# NOTE: Chronos-2's pipeline.fit() is opaque — no epoch-level checkpoint saves
# are exposed. If the Colab runtime disconnects mid-training, progress is lost.
# Consider using Colab Pro for persistent runtimes when training Chronos-2, or
# reduce num_steps to complete training within the free-tier timeout.

from tick2.utils.colab import save_checkpoint_to_drive, load_checkpoint_from_drive, setup_training_log

output_base = Path(RESULTS_DIR)
ft_output_dir = output_base / "chronos2_ft" / TRAINING_MODE
device_results_dir = output_base / device_label

# Persist training logs to disk (timing, errors, warnings)
log_path = setup_training_log(ft_output_dir)
print(f"Training log: {log_path}")

cached_path = device_results_dir / f"chronos2-ft-{FINETUNE_MODE}_{TRAINING_MODE}.csv"

# Check for existing Drive checkpoint (resume after disconnect)
drive_model_name = f"chronos2_ft/{TRAINING_MODE}/combined_best"
ckpt_local = ft_output_dir / "combined" / "combined_best"
if not cached_path.exists() and not FORCE_RETRAIN and not ckpt_local.exists():
    resumed = load_checkpoint_from_drive(
        model_name=drive_model_name,
        local_path=str(ckpt_local),
    )
    if resumed:
        print(f"[RESUMED] Loaded checkpoint from Drive: {resumed}")

if cached_path.exists() and not FORCE_RETRAIN:
    print(f"[CACHED] {cached_path}")
elif ckpt_local.exists() and not FORCE_RETRAIN:
    print(f"[CACHED] Checkpoint exists at {ckpt_local}, skipping training")
else:
    clear_gpu_memory()
    ft_results = finetune_chronos2(
        prepared=prepared,
        config=config,
        output_dir=str(ft_output_dir),
        training_mode=TRAINING_MODE,
        finetune_mode=FINETUNE_MODE,
        with_covariates=True,
        learning_rate=1e-5 if FINETUNE_MODE == "lora" else 1e-6,
        num_steps=1000,
        batch_size=256,
        device_map=device,
    )
    for r in ft_results:
        print(
            f"  {r.machine}: {r.training_time_s:.1f}s, "
            f"ckpt={r.checkpoint_path}"
        )

    # Save checkpoint to Drive for persistence
    save_checkpoint_to_drive(
        local_path=ft_output_dir / "combined",
        model_name=f"chronos2_ft/{TRAINING_MODE}/combined_best",
    )

    checkpoint_push("finetuning")

In [None]:
# === Evaluate Fine-Tuned Model ===
from tick2.models.chronos2 import Chronos2Wrapper

if cached_path.exists() and not FORCE_RETRAIN:
    ft_eval_df = pd.read_csv(cached_path)
    print(f"Loaded cached evaluation: {len(ft_eval_df)} rows")
else:
    # Determine checkpoint path based on training mode
    if TRAINING_MODE == "combined":
        ckpt_path = ft_output_dir / "combined" / "combined_best"
        if not ckpt_path.exists():
            # Fall back to parent directory if Chronos saved differently
            ckpt_path = ft_output_dir / "combined"
    else:
        ckpt_path = ft_output_dir

    ft_pipeline = load_finetuned_chronos2(str(ckpt_path), device_map=device)

    # Create a wrapper that uses the fine-tuned pipeline
    ft_wrapper = Chronos2Wrapper(
        model_id=str(ckpt_path), model_name=f"chronos2-ft-{FINETUNE_MODE}"
    )
    ft_wrapper._pipeline = ft_pipeline
    ft_wrapper._device = device

    ft_eval_df = evaluate_finetuned(
        model=ft_wrapper,
        prepared=prepared,
        config=config,
        training_mode=f"ft_{TRAINING_MODE}",
    )

    device_results_dir.mkdir(parents=True, exist_ok=True)
    ft_eval_df.to_csv(cached_path, index=False)
    checkpoint_push("evaluation")

print(f"Evaluation rows: {len(ft_eval_df)}")
print(f"Mean MAE:  {ft_eval_df['mae'].mean():.4f}")
print(f"Mean RMSE: {ft_eval_df['rmse'].mean():.4f}")
if ft_eval_df["coverage"].notna().any():
    print(f"Mean Coverage: {ft_eval_df['coverage'].mean():.1%}")

In [None]:
# === Load Zero-Shot Baselines ===
zs_dir = output_base.parent / "output" / "02"
zs_results = load_zero_shot_baselines(zs_dir, model_name="chronos2-small")
print(f"Zero-shot baselines: {len(zs_results)} rows")
if not zs_results.empty:
    print(f"  Mean MAE: {zs_results['mae'].mean():.4f}")
    print(f"  Machines: {zs_results['machine'].unique().tolist()}")

In [None]:
# === Comparison: Fine-Tuned vs Zero-Shot ===
combined = compare_ft_vs_zero_shot(ft_eval_df, zs_results)

# Print per-machine improvement
print("Per-machine improvement (FT vs zero-shot):")
print("-" * 60)
for machine in sorted(combined["machine"].unique()):
    m_df = combined[combined["machine"] == machine]
    group_cols = ["context_length", "horizon"]
    available_cols = [c for c in group_cols if c in m_df.columns]
    if not available_cols:
        continue
    for _, group in m_df.groupby(available_cols):
        zs = group[group["training_mode"] == "zero_shot"]["mae"].values
        ft = group[group["training_mode"] != "zero_shot"]["mae"].values
        if len(zs) > 0 and len(ft) > 0:
            improvement = (zs[0] - ft[0]) / zs[0] * 100
            ctx = group["context_length"].iloc[0]
            hz = group["horizon"].iloc[0]
            print(
                f"  {machine:16s} ctx={ctx:5d} hz={hz:4d}: "
                f"{zs[0]:.4f} -> {ft[0]:.4f} ({improvement:+.1f}%)"
            )

print(f"\nCombined results: {len(combined)} rows")
if IN_COLAB:
    display(combined)
else:
    print(combined.to_string())

In [None]:
# === Visualizations ===
results_dir = Path(RESULTS_DIR)
fig_dir = results_dir / "figures"
fig_dir.mkdir(parents=True, exist_ok=True)

if not combined.empty:
    # --- 1. MAE Comparison: FT vs Zero-Shot by Machine ---
    fig, ax = plt.subplots(figsize=(12, 5))
    comparison_data = combined.groupby(["machine", "training_mode"])["mae"].mean().unstack()
    comparison_data.plot(kind="bar", ax=ax)
    ax.set_ylabel("MAE (ppm)")
    ax.set_title(f"Chronos-2 Fine-Tuned ({FINETUNE_MODE}) vs Zero-Shot")
    ax.legend(title="Training Mode")
    plt.xticks(rotation=45)
    plt.tight_layout()
    fig.savefig(
        fig_dir / f"mae_ft_vs_zs_{FINETUNE_MODE}.png", dpi=150, bbox_inches="tight"
    )
    plt.show()

    # --- 2. MAE by Context Length: FT vs Zero-Shot ---
    if len(combined["context_length"].unique()) > 1:
        fig, ax = plt.subplots(figsize=(10, 5))
        sns.lineplot(
            data=combined,
            x="context_length",
            y="mae",
            hue="training_mode",
            style="training_mode",
            markers=True,
            ax=ax,
        )
        ax.set_xlabel("Context Length (timesteps)")
        ax.set_ylabel("MAE (ppm)")
        ax.set_title("MAE vs Context Length (FT vs Zero-Shot)")
        plt.tight_layout()
        fig.savefig(
            fig_dir / f"ctx_sensitivity_{FINETUNE_MODE}.png",
            dpi=150,
            bbox_inches="tight",
        )
        plt.show()

    # --- 3. Coverage Comparison (if available) ---
    if combined["coverage"].notna().any():
        cov_data = combined[combined["coverage"].notna()]
        fig, ax = plt.subplots(figsize=(10, 5))
        cov_pivot = cov_data.groupby(
            ["machine", "training_mode"]
        )["coverage"].mean().unstack()
        cov_pivot.plot(kind="bar", ax=ax)
        ax.set_ylabel("Coverage (proportion)")
        ax.set_title("Prediction Interval Coverage (FT vs Zero-Shot)")
        ax.axhline(y=0.8, color="r", linestyle="--", alpha=0.5, label="Target (80%)")
        ax.legend(title="Training Mode")
        plt.xticks(rotation=45)
        plt.tight_layout()
        fig.savefig(
            fig_dir / f"coverage_{FINETUNE_MODE}.png",
            dpi=150,
            bbox_inches="tight",
        )
        plt.show()

    # --- 4. Inference Latency Comparison ---
    fig, ax = plt.subplots(figsize=(10, 4))
    latency_data = combined.groupby("training_mode")["inference_ms"].mean().sort_values()
    latency_data.plot(kind="barh", ax=ax, color="steelblue")
    ax.set_xlabel("Mean Inference Time (ms)")
    ax.set_title("Inference Latency: FT vs Zero-Shot")
    plt.tight_layout()
    fig.savefig(
        fig_dir / f"latency_{FINETUNE_MODE}.png", dpi=150, bbox_inches="tight"
    )
    plt.show()

    print(f"Saved figures to: {fig_dir}")
else:
    print("No combined results to visualize.")

In [None]:
# === Export Results ===
from tick2.benchmark.reporting import results_to_latex, save_results

if not combined.empty:
    # Save combined CSV and LaTeX table
    csv_path, latex_path = save_results(
        combined,
        results_dir,
        prefix=f"chronos2_ft_{FINETUNE_MODE}_{TRAINING_MODE}",
    )
    print(f"CSV:   {csv_path}")
    print(f"LaTeX: {latex_path}")
    print()
    print(
        results_to_latex(
            combined,
            caption=(
                f"Chronos-2 fine-tuning ({FINETUNE_MODE}) vs zero-shot "
                f"({TRAINING_MODE} training)"
            ),
            label=f"tab:chronos2-ft-{FINETUNE_MODE}",
        )
    )

    # Also save per-FT-mode evaluation separately
    ft_only = combined[combined["training_mode"] != "zero_shot"]
    if not ft_only.empty:
        ft_csv = device_results_dir / f"chronos2-ft-{FINETUNE_MODE}_eval.csv"
        ft_only.to_csv(ft_csv, index=False)
        print(f"\nFT-only CSV: {ft_csv}")
else:
    print("No results to export.")

In [None]:
# === Final Push ===
if IN_COLAB:
    os.chdir(REPO_DIR)

    # Stage all outputs: CSVs, figures, LaTeX tables
    subprocess.run(
        ["git", "add", "tick2/notebooks/output/03/"], check=True
    )

    # Check if there is anything new to commit
    status = subprocess.run(
        ["git", "status", "--porcelain", "tick2/notebooks/output/03/"],
        capture_output=True,
        text=True,
    )
    if status.stdout.strip():
        subprocess.run(
            [
                "git", "commit", "-m",
                f"results: notebook 03b chronos2 figures and combined "
                f"({FINETUNE_MODE}, {TRAINING_MODE}, {device_label})",
            ],
            check=True,
        )
        if GITHUB_TOKEN:
            subprocess.run(["git", "fetch", "-q", "origin"], capture_output=True, timeout=30)
            subprocess.run(["git", "rebase", "origin/main"], capture_output=True, timeout=30)
            subprocess.run(["git", "push"], check=True)
            print(
                "Pushed final outputs (figures + combined CSV) to GitHub."
            )
        else:
            print(
                "Committed locally but GITHUB_TOKEN not set -- skipping push.\n"
                "Set the secret in Colab sidebar > Secrets > GITHUB_TOKEN"
            )
    else:
        print(
            "No new outputs to commit "
            "(per-step checkpoints already pushed)."
        )
else:
    print(f"Local run. Outputs saved to: {results_dir}")
    print(
        "Run 'git add tick2/notebooks/output/03/ && git commit && git push' "
        "to share."
    )