# ChronoTick 2: Chronos-2 Fine-Tuning

Fine-tune Amazon Chronos-2 Small (28M) using LoRA and full fine-tuning.
Supports covariates via past_covariates and future_covariates dicts.

## Experiments
- E1: LoRA FT on univariate drift only
- E2: LoRA FT with sensor covariates
- E3: Full FT vs LoRA comparison

## Training Mode
Set `TRAINING_MODE` to "combined" (all 4 machines) or "per_machine".

In [None]:
import os
import subprocess
import sys

IN_COLAB = "COLAB_GPU" in os.environ or os.path.exists("/content")

if IN_COLAB:
    REPO_DIR = "/content/sensor-collector"
    REPO_URL = "https://github.com/JaimeCernuda/sensor-collector.git"
    GITHUB_TOKEN = None
    try:
        from google.colab import userdata

        GITHUB_TOKEN = userdata.get("GITHUB_TOKEN")
    except Exception:
        print("WARNING: GITHUB_TOKEN not available")

    auth_url = (
        f"https://{GITHUB_TOKEN}@github.com/JaimeCernuda/sensor-collector.git"
        if GITHUB_TOKEN
        else REPO_URL
    )

    if os.path.exists(REPO_DIR):
        subprocess.run(
            ["git", "-C", REPO_DIR, "remote", "set-url", "origin", auth_url],
            check=True,
        )
        subprocess.run(["git", "-C", REPO_DIR, "fetch", "-q", "origin"], check=True)
        subprocess.run(
            ["git", "-C", REPO_DIR, "reset", "--hard", "origin/main"],
            check=True,
        )
    else:
        subprocess.run(["git", "clone", "-q", auth_url, REPO_DIR], check=True)

    subprocess.run(
        ["git", "-C", REPO_DIR, "config", "user.name", "Colab Runner"],
        check=True,
    )
    subprocess.run(
        ["git", "-C", REPO_DIR, "config", "user.email", "colab@chronotick.dev"],
        check=True,
    )
    subprocess.run(["pip", "install", "-q", "-e", f"{REPO_DIR}/tick2/"], check=True)

    tick2_src = f"{REPO_DIR}/tick2/src"
    if tick2_src not in sys.path:
        sys.path.insert(0, tick2_src)

    # Always mount Drive â€” needed for checkpoint persistence (models too large for git)
    from google.colab import drive

    drive.mount("/content/drive")

    # Data: prefer repo copy, fall back to Drive
    DATA_DIR = f"{REPO_DIR}/sensors/data"
    if not os.path.isdir(f"{DATA_DIR}/24h_snapshot"):
        DATA_DIR = "/content/drive/MyDrive/chronotick2/data"

    RESULTS_DIR = f"{REPO_DIR}/tick2/notebooks/output/03"
else:
    GITHUB_TOKEN = None
    DATA_DIR = None
    RESULTS_DIR = os.path.join(
        os.path.dirname("__file__") if "__file__" in dir() else ".",
        "output",
        "03",
    )

DEVICE_DIR_MAP = {"cuda": "gpu", "cpu": "cpu"}


def checkpoint_push(label):
    """Git add, commit, and push results after a step completes."""
    if not IN_COLAB:
        return
    try:
        subprocess.run(
            ["git", "-C", REPO_DIR, "add", "tick2/notebooks/output/03/"],
            check=True,
            capture_output=True,
        )
        status = subprocess.run(
            [
                "git",
                "-C",
                REPO_DIR,
                "status",
                "--porcelain",
                "tick2/notebooks/output/03/",
            ],
            capture_output=True,
            text=True,
        )
        if not status.stdout.strip():
            return
        subprocess.run(
            [
                "git",
                "-C",
                REPO_DIR,
                "commit",
                "-m",
                f"results: notebook 03b chronos2 {label} ({device_label})",
            ],
            check=True,
            capture_output=True,
        )
        if GITHUB_TOKEN:
            fetch_cmd = [
                "git",
                "-C",
                REPO_DIR,
                "fetch",
                "-q",
                "origin",
            ]
            rebase_cmd = [
                "git",
                "-C",
                REPO_DIR,
                "rebase",
                "origin/main",
            ]
            subprocess.run(
                fetch_cmd,
                capture_output=True,
                timeout=30,
            )
            subprocess.run(
                rebase_cmd,
                capture_output=True,
                timeout=30,
            )
            subprocess.run(
                ["git", "-C", REPO_DIR, "push"],
                check=True,
                capture_output=True,
                timeout=60,
            )
            print(f"  [CHECKPOINT] Pushed {label}")
    except Exception as e:
        print(f"  [CHECKPOINT WARNING] {e}")


print(f"Environment: {'Colab' if IN_COLAB else 'Local'}")

In [None]:
# === Install Chronos-2 dependencies ===
if IN_COLAB:
    subprocess.run(
        ["pip", "install", "-q", "chronos-forecasting[extras]>=2.2"],
        check=True,
    )


print("chronos-forecasting ready")

In [None]:
# === Imports, Config & Training Mode ===
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import torch

from tick2.finetuning.base import FineTuneConfig
from tick2.finetuning.chronos2_ft import (
    finetune_chronos2,
    load_finetuned_chronos2,
)
from tick2.finetuning.data_prep import prepare_datasets
from tick2.finetuning.evaluate import (
    compare_ft_vs_zero_shot,
    evaluate_finetuned,
    load_zero_shot_baselines,
)
from tick2.utils.gpu import clear_gpu_memory

sns.set_theme(style="whitegrid", font_scale=1.1)

# --- Training configuration ---
TRAINING_MODE = "combined"  # "combined" or "per_machine"
DEVICE_OVERRIDE = None
FORCE_RETRAIN = False

device = DEVICE_OVERRIDE or ("cuda" if torch.cuda.is_available() else "cpu")
device_label = DEVICE_DIR_MAP.get(device, device)

if device == "cuda":
    props = torch.cuda.get_device_properties(0)
    vram = getattr(
        props,
        "total_memory",
        getattr(props, "total_mem", 0),
    )
    print(f"GPU:  {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {vram / 1024**3:.1f} GB")
else:
    print("Running on CPU")

config = FineTuneConfig(
    context_length=2048,
    prediction_length=96,
    max_covariates=30,
    seed=42,
)

print(f"Device: {device}, Mode: {TRAINING_MODE}")
print(f"Context: {config.context_length}, Prediction: {config.prediction_length}")

In [None]:
# === Load and Prepare Data ===
data_dir = Path(DATA_DIR) if DATA_DIR else None
prepared = prepare_datasets(config, data_dir=data_dir)

for name, p in prepared.items():
    print(
        f"  {name:16s}: train={len(p.split.train)}, "
        f"val={len(p.split.val)}, test={len(p.split.test)}, "
        f"features={len(p.feature_cols)}"
    )

In [None]:
# === Fine-Tune Chronos-2 (E1, E2, E3) ===
from tick2.utils.colab import (
    load_checkpoint_from_drive,
    save_checkpoint_to_drive,
    setup_training_log,
)

output_base = Path(RESULTS_DIR)
ft_output_dir = output_base / "chronos2_ft" / TRAINING_MODE
device_results_dir = output_base / device_label
device_results_dir.mkdir(parents=True, exist_ok=True)

log_path = setup_training_log(ft_output_dir)
print(f"Training log: {log_path}")

# --- Experiment definitions ---
# E1: LoRA FT on univariate drift only
# E2: LoRA FT with sensor covariates
# E3: Full FT with sensor covariates
# Note: E3 requires A100+ (full FT OOMs on T4 at bs=256)
EXPERIMENTS = [
    {
        "name": "E1_lora_uni",
        "finetune_mode": "lora",
        "with_covariates": False,
        "learning_rate": 1e-5,
        "num_steps": 1000,
        "batch_size": 256,
    },
    {
        "name": "E2_lora_cov",
        "finetune_mode": "lora",
        "with_covariates": True,
        "learning_rate": 1e-5,
        "num_steps": 1000,
        "batch_size": 256,
    },
    {
        "name": "E3_full_cov",
        "finetune_mode": "full",
        "with_covariates": True,
        "learning_rate": 1e-6,
        "num_steps": 1000,
        "batch_size": 256,
    },
]

all_ft_results = []
experiment_labels = {}

for exp in EXPERIMENTS:
    exp_name = exp["name"]
    ft_mode = exp["finetune_mode"]
    cov = exp["with_covariates"]
    bs = exp["batch_size"]
    print(f"\n{'=' * 60}")
    print(
        f"  {exp_name}  (ft={ft_mode},"
        f" covariates={cov},"
        f" lr={exp['learning_rate']},"
        f" bs={bs})"
    )
    print(f"{'=' * 60}")

    exp_dir = ft_output_dir / exp_name

    # Check for cached checkpoint (local, then Drive)
    ckpt_local = exp_dir / "combined" / "combined_best"
    ckpt_parent = exp_dir / "combined"
    cached_eval = device_results_dir / f"chronos2-ft-{exp_name}_{TRAINING_MODE}.csv"

    # Restore from Drive to ckpt_parent (not ckpt_local)
    # so the directory structure matches what save_checkpoint_to_drive created
    if not ckpt_local.exists() and not FORCE_RETRAIN:
        drive_name = f"chronos2_ft/{TRAINING_MODE}/{exp_name}"
        resumed = load_checkpoint_from_drive(
            model_name=drive_name,
            local_path=str(ckpt_parent),
        )
        if resumed:
            print(f"  [RESUMED] From Drive: {resumed}")

    if cached_eval.exists() and not FORCE_RETRAIN:
        print(f"  [CACHED] Eval exists: {cached_eval}")
        from tick2.finetuning.base import FineTuneResult

        stub = FineTuneResult(
            model_name=f"chronos2-ft-{exp_name}",
            machine=TRAINING_MODE,
            checkpoint_path=str(ckpt_local),
            config=exp,
        )
        all_ft_results.append(stub)
        experiment_labels[exp_name] = [stub]
        continue

    if ckpt_local.exists() and not FORCE_RETRAIN:
        print(f"  [CACHED] Checkpoint: {ckpt_local}")
        from tick2.finetuning.base import FineTuneResult

        stub = FineTuneResult(
            model_name=f"chronos2-ft-{exp_name}",
            machine=TRAINING_MODE,
            checkpoint_path=str(ckpt_local),
            config=exp,
        )
        all_ft_results.append(stub)
        experiment_labels[exp_name] = [stub]
        continue

    clear_gpu_memory()

    try:
        ft_results = finetune_chronos2(
            prepared=prepared,
            config=config,
            output_dir=str(exp_dir),
            training_mode=TRAINING_MODE,
            finetune_mode=ft_mode,
            with_covariates=cov,
            learning_rate=exp["learning_rate"],
            num_steps=exp["num_steps"],
            batch_size=bs,
            device_map=device,
        )

        for r in ft_results:
            r.model_name = f"chronos2-ft-{exp_name}"
            print(f"  {r.machine}: {r.training_time_s:.1f}s, ckpt={r.checkpoint_path}")

        all_ft_results.extend(ft_results)
        experiment_labels[exp_name] = ft_results

        save_checkpoint_to_drive(
            local_path=ckpt_parent,
            model_name=(f"chronos2_ft/{TRAINING_MODE}/{exp_name}"),
        )
        checkpoint_push(exp_name)

    except Exception as e:
        print(f"  [FAIL] {exp_name}: {e}")
        import traceback

        traceback.print_exc()
    finally:
        clear_gpu_memory()

print(f"\n{'=' * 60}")
print(f"  Completed: {list(experiment_labels.keys())}")
print(f"{'=' * 60}")

In [None]:
# === Evaluate Fine-Tuned Models ===
from tick2.finetuning.data_prep import combine_training_data
from tick2.models.chronos2 import Chronos2Wrapper

# Compute shared feature intersection (same as training used)
_, shared_features_all = combine_training_data(prepared)
eval_features = shared_features_all[: config.max_covariates]
print(
    f"Shared eval features: {len(eval_features)}"
    f" (capped from {len(shared_features_all)})"
)

eval_dfs = []

for exp in EXPERIMENTS:
    exp_name = exp["name"]
    cov = exp["with_covariates"]
    print(f"\n--- Evaluating {exp_name} ---")

    cached_eval = device_results_dir / f"chronos2-ft-{exp_name}_{TRAINING_MODE}.csv"
    if cached_eval.exists() and not FORCE_RETRAIN:
        print(f"  [CACHED] {cached_eval}")
        eval_dfs.append(pd.read_csv(cached_eval))
        continue

    # Find checkpoint
    exp_dir = ft_output_dir / exp_name
    ckpt_path = exp_dir / "combined" / "combined_best"
    if not ckpt_path.exists():
        ckpt_path = exp_dir / "combined"
    if not ckpt_path.exists():
        print(f"  [SKIP] No checkpoint for {exp_name}")
        continue

    clear_gpu_memory()

    try:
        ft_pipeline = load_finetuned_chronos2(
            str(ckpt_path),
            device_map=device,
        )

        ft_wrapper = Chronos2Wrapper(
            model_id=str(ckpt_path),
            model_name=f"chronos2-ft-{exp_name}",
        )
        ft_wrapper._pipeline = ft_pipeline
        ft_wrapper._device = device

        # Use covariates only if this experiment trained with them
        shared_cols = eval_features if cov else None

        results_for_exp = experiment_labels.get(exp_name, [])
        ft_epochs = results_for_exp[0].best_epoch if results_for_exp else None
        ft_time = results_for_exp[0].training_time_s if results_for_exp else None
        ft_machines = results_for_exp[0].machine if results_for_exp else ""

        eval_df = evaluate_finetuned(
            model=ft_wrapper,
            prepared=prepared,
            config=config,
            training_mode=f"ft_{TRAINING_MODE}",
            ft_epochs=ft_epochs,
            ft_time_s=ft_time,
            ft_train_machines=ft_machines,
            shared_feature_cols=shared_cols,
        )

        if not eval_df.empty:
            eval_df["experiment"] = exp_name
            eval_df.to_csv(cached_eval, index=False)
            eval_dfs.append(eval_df)
            print(f"  MAE: {eval_df['mae'].mean():.4f}")
            print(f"  Saved: {cached_eval}")
        else:
            print(f"  [WARN] No eval results for {exp_name}")

        checkpoint_push(f"eval-{exp_name}")

    except Exception as e:
        print(f"  [FAIL] Eval {exp_name}: {e}")
        import traceback

        traceback.print_exc()
    finally:
        clear_gpu_memory()

# Combine all evaluation results
if eval_dfs:
    ft_eval_df = pd.concat(eval_dfs, ignore_index=True)
    print(f"\nTotal FT eval rows: {len(ft_eval_df)}")
    print(f"Mean MAE:  {ft_eval_df['mae'].mean():.4f}")
    print(f"Mean RMSE: {ft_eval_df['rmse'].mean():.4f}")
    if ft_eval_df["coverage"].notna().any():
        print(f"Mean Coverage: {ft_eval_df['coverage'].mean():.1%}")
    display(ft_eval_df)
else:
    ft_eval_df = pd.DataFrame()
    print("No evaluation results collected.")

In [None]:
# === Load Zero-Shot Baselines ===
zs_dir = output_base.parent / "output" / "02"
zs_results = load_zero_shot_baselines(
    zs_dir,
    model_name="chronos2-small",
)
print(f"Zero-shot baselines: {len(zs_results)} rows")
if not zs_results.empty:
    print(f"  Mean MAE: {zs_results['mae'].mean():.4f}")
    machines = zs_results["machine"].unique().tolist()
    print(f"  Machines: {machines}")

In [None]:
# === Comparison: Fine-Tuned vs Zero-Shot ===
if not ft_eval_df.empty and not zs_results.empty:
    combined = compare_ft_vs_zero_shot(ft_eval_df, zs_results)

    # --- Per-experiment improvement ---
    best_zs = (
        zs_results.groupby("machine")["mae"]
        .agg(["min", "idxmin"])
        .rename(columns={"min": "best_zs_mae"})
    )
    best_zs["best_zs_ctx"] = zs_results.loc[best_zs["idxmin"], "context_length"].values
    best_zs = best_zs.drop(columns=["idxmin"])

    summary_rows = []
    for machine in ft_eval_df["machine"].unique():
        if machine not in best_zs.index:
            continue
        bzs_mae = best_zs.loc[machine, "best_zs_mae"]
        bzs_ctx = int(best_zs.loc[machine, "best_zs_ctx"])

        for exp in EXPERIMENTS:
            exp_name = exp["name"]
            ft_mask = ft_eval_df["model"].str.contains(exp_name, na=False) & (
                ft_eval_df["machine"] == machine
            )
            if not ft_mask.any():
                continue
            ft_mae = ft_eval_df.loc[ft_mask, "mae"].mean()

            if bzs_mae > 0:
                imp = (bzs_mae - ft_mae) / bzs_mae * 100
                summary_rows.append(
                    {
                        "machine": machine,
                        "experiment": exp_name,
                        "ft_mae": ft_mae,
                        "best_zs_ctx": bzs_ctx,
                        "best_zs_mae": bzs_mae,
                        "vs_best_zs_pct": imp,
                    }
                )

    if summary_rows:
        summary_df = pd.DataFrame(summary_rows)
        print("=== FT vs Best Zero-Shot ===")
        display(summary_df.round(4))

        print("\n=== Per-Experiment Summary ===")
        for exp in EXPERIMENTS:
            exp_name = exp["name"]
            ed = summary_df[summary_df["experiment"] == exp_name]
            if not ed.empty:
                ft_m = ed["ft_mae"].mean()
                zs_m = ed["best_zs_mae"].mean()
                imp_m = ed["vs_best_zs_pct"].mean()
                print(
                    f"  {exp_name}: FT={ft_m:.4f},"
                    f" ZS={zs_m:.4f},"
                    f" improvement={imp_m:+.1f}%"
                )
    else:
        print("Could not compute improvement.")

    print(f"\nCombined results: {len(combined)} rows")
    display(combined)
elif ft_eval_df.empty:
    combined = pd.DataFrame()
    print("No FT results to compare.")
else:
    combined = ft_eval_df.copy()
    print("No zero-shot baselines to compare against.")
    display(ft_eval_df)

In [None]:
# === Visualizations ===
results_dir = Path(RESULTS_DIR)
fig_dir = results_dir / "figures"
fig_dir.mkdir(parents=True, exist_ok=True)

if not ft_eval_df.empty:
    # --- 1. MAE Comparison Bar Chart ---
    fig, ax = plt.subplots(figsize=(14, 5))

    plot_rows = []

    # Add ZS baseline
    if not zs_results.empty:
        for machine in zs_results["machine"].unique():
            m_zs = zs_results[zs_results["machine"] == machine]
            plot_rows.append(
                {
                    "machine": machine,
                    "variant": "Zero-Shot",
                    "mae": m_zs["mae"].mean(),
                }
            )

    # Add FT experiments
    for exp in EXPERIMENTS:
        exp_name = exp["name"]
        exp_data = ft_eval_df[ft_eval_df["model"].str.contains(exp_name, na=False)]
        for machine in exp_data["machine"].unique():
            m_ft = exp_data[exp_data["machine"] == machine]
            plot_rows.append(
                {
                    "machine": machine,
                    "variant": exp_name,
                    "mae": m_ft["mae"].mean(),
                }
            )

    if plot_rows:
        plot_df = pd.DataFrame(plot_rows)
        sns.barplot(
            data=plot_df,
            x="machine",
            y="mae",
            hue="variant",
            ax=ax,
        )
        ax.set_ylabel("MAE (ppm)")
        ax.set_title("Chronos-2: FT vs Zero-Shot MAE by Machine")
        ax.legend(
            title="Variant",
            bbox_to_anchor=(1.05, 1),
            loc="upper left",
        )
        plt.tight_layout()
        fig.savefig(
            fig_dir / "chronos2_ft_vs_zs_mae.png",
            dpi=150,
            bbox_inches="tight",
        )
        plt.show()
    else:
        plt.close(fig)

    # --- 2. Coverage Comparison (if available) ---
    if ft_eval_df["coverage"].notna().any():
        cov_data = ft_eval_df[ft_eval_df["coverage"].notna()]
        fig, ax = plt.subplots(figsize=(10, 5))
        sns.barplot(
            data=cov_data,
            x="machine",
            y="coverage",
            hue="model",
            ax=ax,
        )
        ax.axhline(
            0.8,
            color="red",
            linestyle="--",
            alpha=0.5,
            label="80% target",
        )
        ax.set_ylabel("Coverage")
        ax.set_title("Prediction Interval Coverage")
        ax.legend(
            title="Model",
            bbox_to_anchor=(1.05, 1),
            loc="upper left",
        )
        plt.tight_layout()
        fig.savefig(
            fig_dir / "chronos2_ft_coverage.png",
            dpi=150,
            bbox_inches="tight",
        )
        plt.show()

    print(f"Saved figures to: {fig_dir}")
else:
    print("No results to visualize.")

In [None]:
# === Export Results ===
from tick2.benchmark.reporting import results_to_latex, save_results

if not ft_eval_df.empty:
    # Save combined FT results
    ft_csv = device_results_dir / f"chronos2-ft-all_{TRAINING_MODE}.csv"
    ft_eval_df.to_csv(ft_csv, index=False)
    print(f"FT results CSV: {ft_csv}")

if not combined.empty:
    csv_path, latex_path = save_results(
        combined,
        results_dir,
        prefix=f"chronos2_ft_{TRAINING_MODE}",
    )
    print(f"Comparison CSV:   {csv_path}")
    print(f"Comparison LaTeX: {latex_path}")
    latex = results_to_latex(
        combined,
        caption=(f"Chronos-2 fine-tuning vs zero-shot ({TRAINING_MODE})"),
        label="tab:chronos2-ft",
    )
    print(f"\n{latex}")
else:
    print("No results to export.")

In [None]:
# === Final Push ===
if IN_COLAB:
    os.chdir(REPO_DIR)

    subprocess.run(
        ["git", "add", "tick2/notebooks/output/03/"],
        check=True,
    )

    status = subprocess.run(
        ["git", "status", "--porcelain", "tick2/notebooks/output/03/"],
        capture_output=True,
        text=True,
    )
    if status.stdout.strip():
        msg = f"results: notebook 03b chronos2-ft figures and combined ({device_label})"
        subprocess.run(
            ["git", "commit", "-m", msg],
            check=True,
        )
        if GITHUB_TOKEN:
            subprocess.run(
                ["git", "fetch", "-q", "origin"],
                capture_output=True,
                timeout=30,
            )
            subprocess.run(
                ["git", "rebase", "origin/main"],
                capture_output=True,
                timeout=30,
            )
            subprocess.run(["git", "push"], check=True)
            print("Pushed final outputs to GitHub.")
        else:
            print("Committed locally (no token for push).")
    else:
        print("No new outputs to commit.")
else:
    print(f"Local run. Outputs saved to: {results_dir}")
    print(
        "Run 'git add tick2/notebooks/output/03/ && git commit && git push' to share."
    )