# ChronoTick 2: Moirai 1.1 Fine-Tuning

Fine-tune Salesforce Moirai 1.1 Small (14M) using wide_multivariate mode.
All channels (target + sensors) are treated as joint targets during training;
at inference, only the target channel prediction is extracted.

## Experiments
- **E1_univariate**: Drift-only FT, no sensor covariates (`max_covariates=0`)
- **E2_cov10**: Top-10 SHAP features (`max_covariates=10`)
- **E3_cov20**: Top-20 SHAP features (`max_covariates=20`)

All share: `max_epochs=20`, `learning_rate=1e-4`, `batch_size=32`,
`patch_size=32`, `early_stopping_patience=5`.

## Training Mode
Set `TRAINING_MODE` to "combined" or "per_machine".

In [None]:
# === Environment Setup ===
import os
import subprocess
import sys

IN_COLAB = "COLAB_GPU" in os.environ or os.path.exists("/content")

if IN_COLAB:
    REPO_DIR = "/content/sensor-collector"
    REPO_URL = "https://github.com/JaimeCernuda/sensor-collector.git"
    GITHUB_TOKEN = None
    try:
        from google.colab import userdata

        GITHUB_TOKEN = userdata.get("GITHUB_TOKEN")
    except Exception:
        print("WARNING: GITHUB_TOKEN not available")
    auth_url = (
        f"https://{GITHUB_TOKEN}@github.com/JaimeCernuda/sensor-collector.git"
        if GITHUB_TOKEN
        else REPO_URL
    )
    if os.path.exists(REPO_DIR):
        subprocess.run(
            ["git", "-C", REPO_DIR, "remote", "set-url", "origin", auth_url], check=True
        )
        subprocess.run(["git", "-C", REPO_DIR, "fetch", "-q", "origin"], check=True)
        subprocess.run(
            ["git", "-C", REPO_DIR, "reset", "--hard", "origin/main"], check=True
        )
    else:
        subprocess.run(["git", "clone", "-q", auth_url, REPO_DIR], check=True)
    subprocess.run(
        ["git", "-C", REPO_DIR, "config", "user.name", "Colab Runner"], check=True
    )
    subprocess.run(
        ["git", "-C", REPO_DIR, "config", "user.email", "colab@chronotick.dev"],
        check=True,
    )
    subprocess.run(["pip", "install", "-q", "-e", f"{REPO_DIR}/tick2/"], check=True)
    tick2_src = f"{REPO_DIR}/tick2/src"
    if tick2_src not in sys.path:
        sys.path.insert(0, tick2_src)

    # Always mount Drive â€” needed for checkpoint persistence (models too large for git)
    from google.colab import drive

    drive.mount("/content/drive")

    # Data: prefer repo copy, fall back to Drive
    DATA_DIR = f"{REPO_DIR}/sensors/data"
    if not os.path.isdir(f"{DATA_DIR}/24h_snapshot"):
        DATA_DIR = "/content/drive/MyDrive/chronotick2/data"

    RESULTS_DIR = f"{REPO_DIR}/tick2/notebooks/output/03"
else:
    GITHUB_TOKEN = None
    DATA_DIR = None
    RESULTS_DIR = os.path.join(
        os.path.dirname("__file__") if "__file__" in dir() else ".", "output", "03"
    )

DEVICE_DIR_MAP = {"cuda": "gpu", "cpu": "cpu"}


def checkpoint_push(label):
    if not IN_COLAB:
        return
    try:
        subprocess.run(
            ["git", "-C", REPO_DIR, "add", "tick2/notebooks/output/03/"],
            check=True,
            capture_output=True,
        )
        status = subprocess.run(
            [
                "git",
                "-C",
                REPO_DIR,
                "status",
                "--porcelain",
                "tick2/notebooks/output/03/",
            ],
            capture_output=True,
            text=True,
        )
        if not status.stdout.strip():
            return
        subprocess.run(
            [
                "git",
                "-C",
                REPO_DIR,
                "commit",
                "-m",
                f"results: notebook 03c moirai {label} ({device_label})",
            ],
            check=True,
            capture_output=True,
        )
        if GITHUB_TOKEN:
            subprocess.run(
                ["git", "-C", REPO_DIR, "fetch", "-q", "origin"],
                capture_output=True,
                timeout=30,
            )
            subprocess.run(
                ["git", "-C", REPO_DIR, "rebase", "origin/main"],
                capture_output=True,
                timeout=30,
            )
            subprocess.run(
                ["git", "-C", REPO_DIR, "push"],
                check=True,
                capture_output=True,
                timeout=60,
            )
            print(f"  [CHECKPOINT] Pushed {label}")
    except Exception as e:
        print(f"  [CHECKPOINT WARNING] {e}")


print(f"Environment: {'Colab' if IN_COLAB else 'Local'}")

In [None]:
# === Install Moirai Dependencies ===
if IN_COLAB:
    # uni2ts pins torch<2.5; install with --no-deps to keep CUDA torch
    subprocess.run(["pip", "install", "-q", "uni2ts", "--no-deps"], check=True)
    subprocess.run(
        [
            "pip",
            "install",
            "-q",
            "einops>=0.7",
            "gluonts>=0.14",
            "jaxtyping",
            "hydra-core",
            "python-dotenv",
            "lightning",
            "safetensors",
            "huggingface_hub",
        ],
        check=True,
    )

print("uni2ts ready")

In [None]:
# === Imports, Config & Training Mode ===
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import torch

from tick2.finetuning.base import FineTuneConfig
from tick2.finetuning.data_prep import prepare_datasets
from tick2.finetuning.evaluate import (
    compare_ft_vs_zero_shot,
    evaluate_finetuned,
    load_zero_shot_baselines,
)
from tick2.finetuning.moirai_ft import finetune_moirai, load_finetuned_moirai
from tick2.utils.gpu import clear_gpu_memory

sns.set_theme(style="whitegrid", font_scale=1.1)

# --- User-configurable knobs ---
TRAINING_MODE = "combined"  # "combined" or "per_machine"
DEVICE_OVERRIDE = None  # None = auto-detect, "cuda", or "cpu"
FORCE_RETRAIN = False  # Set True to retrain even if cached CSV exists

device = DEVICE_OVERRIDE or ("cuda" if torch.cuda.is_available() else "cpu")
device_label = DEVICE_DIR_MAP.get(device, device)
config = FineTuneConfig(
    context_length=1000, prediction_length=96, max_covariates=20, seed=42
)

if device == "cuda":
    props = torch.cuda.get_device_properties(0)
    vram = getattr(props, "total_memory", getattr(props, "total_mem", 0))
    print(f"GPU:  {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {vram / 1024**3:.1f} GB")
else:
    print("Running on CPU")
print(f"Device: {device}, Mode: {TRAINING_MODE}")

In [None]:
# === Load & Prepare Data ===
data_dir = Path(DATA_DIR) if DATA_DIR else None
prepared = prepare_datasets(config, data_dir=data_dir)

for name, p in prepared.items():
    print(
        f"  {name:16s}: train={len(p.split.train)}, val={len(p.split.val)}, test={len(p.split.test)}, features={len(p.feature_cols)}"
    )

In [None]:
# === Fine-Tune Moirai 1.1 (E1, E2, E3) ===
from tick2.finetuning.base import FineTuneResult
from tick2.utils.colab import (
    load_checkpoint_from_drive,
    save_checkpoint_to_drive,
    setup_training_log,
)

output_base = Path(RESULTS_DIR)
ft_output_dir = output_base / "moirai_ft" / TRAINING_MODE
device_results_dir = output_base / device_label
device_results_dir.mkdir(parents=True, exist_ok=True)

log_path = setup_training_log(ft_output_dir)
print(f"Training log: {log_path}")

# --- Experiment definitions ---
# E1: Univariate FT (drift only, no sensor covariates)
# E2: Top-10 SHAP features
# E3: Top-20 SHAP features (original notebook default)
EXPERIMENTS = [
    {
        "name": "E1_univariate",
        "max_covariates": 0,
        "max_epochs": 20,
        "learning_rate": 1e-4,
        "batch_size": 32,
        "patch_size": 32,
        "early_stopping_patience": 5,
    },
    {
        "name": "E2_cov10",
        "max_covariates": 10,
        "max_epochs": 20,
        "learning_rate": 1e-4,
        "batch_size": 32,
        "patch_size": 32,
        "early_stopping_patience": 5,
    },
    {
        "name": "E3_cov20",
        "max_covariates": 20,
        "max_epochs": 20,
        "learning_rate": 1e-4,
        "batch_size": 32,
        "patch_size": 32,
        "early_stopping_patience": 5,
    },
]

all_ft_results = []
experiment_labels = {}

for exp in EXPERIMENTS:
    exp_name = exp["name"]
    n_cov = exp["max_covariates"]
    print(f"\n{'=' * 60}")
    print(
        f"  {exp_name}  (max_covariates={n_cov},"
        f" lr={exp['learning_rate']},"
        f" bs={exp['batch_size']})"
    )
    print(f"{'=' * 60}")

    exp_dir = ft_output_dir / exp_name

    # Per-experiment FineTuneConfig with the correct max_covariates
    ft_config = FineTuneConfig(
        context_length=config.context_length,
        prediction_length=config.prediction_length,
        max_covariates=n_cov,
        seed=config.seed,
    )

    # Check for cached checkpoint (local, then Drive)
    ckpt_local = exp_dir / "combined" / "best"
    cached_eval = device_results_dir / f"moirai-1.1-ft-{exp_name}_{TRAINING_MODE}.csv"

    if not ckpt_local.exists() and not FORCE_RETRAIN:
        drive_name = f"moirai_ft/{TRAINING_MODE}/{exp_name}"
        resumed = load_checkpoint_from_drive(
            model_name=drive_name,
            local_path=str(ckpt_local),
        )
        if resumed:
            print(f"  [RESUMED] From Drive: {resumed}")

    if cached_eval.exists() and not FORCE_RETRAIN:
        print(f"  [CACHED] Eval exists: {cached_eval}")
        stub = FineTuneResult(
            model_name=f"moirai-1.1-ft-{exp_name}",
            machine=TRAINING_MODE,
            checkpoint_path=str(ckpt_local),
            config=exp,
        )
        all_ft_results.append(stub)
        experiment_labels[exp_name] = [stub]
        continue

    if ckpt_local.exists() and not FORCE_RETRAIN:
        print(f"  [CACHED] Checkpoint: {ckpt_local}")
        stub = FineTuneResult(
            model_name=f"moirai-1.1-ft-{exp_name}",
            machine=TRAINING_MODE,
            checkpoint_path=str(ckpt_local),
            config=exp,
        )
        all_ft_results.append(stub)
        experiment_labels[exp_name] = [stub]
        continue

    clear_gpu_memory()

    try:
        ft_results = finetune_moirai(
            prepared=prepared,
            config=ft_config,
            output_dir=str(exp_dir),
            training_mode=TRAINING_MODE,
            patch_size=exp["patch_size"],
            max_epochs=exp["max_epochs"],
            learning_rate=exp["learning_rate"],
            batch_size=exp["batch_size"],
            early_stopping_patience=exp["early_stopping_patience"],
            device=device,
        )

        for r in ft_results:
            r.model_name = f"moirai-1.1-ft-{exp_name}"
            print(f"  {r.machine}: {r.training_time_s:.1f}s, best_epoch={r.best_epoch}")

        all_ft_results.extend(ft_results)
        experiment_labels[exp_name] = ft_results

        save_checkpoint_to_drive(
            local_path=exp_dir / "combined",
            model_name=(f"moirai_ft/{TRAINING_MODE}/{exp_name}"),
        )
        checkpoint_push(exp_name)

    except Exception as e:
        print(f"  [FAIL] {exp_name}: {e}")
        import traceback

        traceback.print_exc()
    finally:
        clear_gpu_memory()

print(f"\n{'=' * 60}")
print(f"  Completed: {list(experiment_labels.keys())}")
print(f"{'=' * 60}")

In [None]:
# === Evaluate Fine-Tuned Models ===
from tick2.finetuning.data_prep import combine_training_data
from tick2.models.moirai import MoiraiWrapper

# Compute shared feature intersection (same as training used)
_, shared_features_all = combine_training_data(prepared)
print(f"Shared eval features available: {len(shared_features_all)}")

eval_dfs = []

for exp in EXPERIMENTS:
    exp_name = exp["name"]
    n_cov = exp["max_covariates"]
    print(f"\n--- Evaluating {exp_name} (max_covariates={n_cov}) ---")

    cached_eval = device_results_dir / f"moirai-1.1-ft-{exp_name}_{TRAINING_MODE}.csv"
    if cached_eval.exists() and not FORCE_RETRAIN:
        print(f"  [CACHED] {cached_eval}")
        eval_dfs.append(pd.read_csv(cached_eval))
        continue

    # Find checkpoint
    exp_dir = ft_output_dir / exp_name
    ckpt_path = exp_dir / "combined" / "best"
    if not ckpt_path.exists():
        ckpt_path = exp_dir / "combined"
    if not ckpt_path.exists():
        print(f"  [SKIP] No checkpoint for {exp_name}")
        continue

    clear_gpu_memory()

    try:
        # Determine eval features for this experiment
        eval_features = shared_features_all[:n_cov] if n_cov > 0 else None

        ft_model = load_finetuned_moirai(
            str(ckpt_path),
            context_length=config.context_length,
            prediction_length=config.prediction_length,
            n_covariates=n_cov,
        )

        ft_wrapper = MoiraiWrapper(
            model_name=f"moirai-1.1-ft-{exp_name}",
            max_covariates=n_cov,
        )
        ft_wrapper._model = ft_model.module if hasattr(ft_model, "module") else ft_model
        ft_wrapper._device = device

        results_for_exp = experiment_labels.get(exp_name, [])
        ft_epochs = results_for_exp[0].best_epoch if results_for_exp else None
        ft_time = results_for_exp[0].training_time_s if results_for_exp else None
        ft_machines = results_for_exp[0].machine if results_for_exp else ""

        eval_df = evaluate_finetuned(
            model=ft_wrapper,
            prepared=prepared,
            config=config,
            training_mode=f"ft_{TRAINING_MODE}",
            ft_epochs=ft_epochs,
            ft_time_s=ft_time,
            ft_train_machines=ft_machines,
            shared_feature_cols=eval_features,
        )

        if not eval_df.empty:
            eval_df["experiment"] = exp_name
            eval_df.to_csv(cached_eval, index=False)
            eval_dfs.append(eval_df)
            print(f"  MAE: {eval_df['mae'].mean():.4f}")
            print(f"  Saved: {cached_eval}")
        else:
            print(f"  [WARN] No eval results for {exp_name}")

        checkpoint_push(f"eval-{exp_name}")

    except Exception as e:
        print(f"  [FAIL] Eval {exp_name}: {e}")
        import traceback

        traceback.print_exc()
    finally:
        clear_gpu_memory()

# Combine all evaluation results
if eval_dfs:
    ft_eval_df = pd.concat(eval_dfs, ignore_index=True)
    print(f"\nTotal FT eval rows: {len(ft_eval_df)}")
    print(f"Mean MAE:  {ft_eval_df['mae'].mean():.4f}")
    print(f"Mean RMSE: {ft_eval_df['rmse'].mean():.4f}")
    if ft_eval_df["coverage"].notna().any():
        print(f"Mean Coverage: {ft_eval_df['coverage'].mean():.1%}")
    display(ft_eval_df)
else:
    ft_eval_df = pd.DataFrame()
    print("No evaluation results collected.")

In [None]:
# === Load Zero-Shot Baselines ===
zs_dir = output_base.parent / "output" / "02"
zs_results = load_zero_shot_baselines(zs_dir, model_name="moirai-1.1-small")
print(f"Zero-shot baselines: {len(zs_results)} rows")
if not zs_results.empty:
    print(f"  Mean MAE (ZS): {zs_results['mae'].mean():.4f}")
else:
    print("  No zero-shot results found. Run notebook 02 first.")

In [None]:
# === Comparison: Fine-Tuned vs Zero-Shot ===
if not ft_eval_df.empty and not zs_results.empty:
    combined = compare_ft_vs_zero_shot(ft_eval_df, zs_results)

    # --- Per-experiment improvement vs best ZS ---
    best_zs = (
        zs_results.groupby("machine")["mae"]
        .agg(["min", "idxmin"])
        .rename(columns={"min": "best_zs_mae"})
    )
    best_zs["best_zs_ctx"] = zs_results.loc[best_zs["idxmin"], "context_length"].values
    best_zs = best_zs.drop(columns=["idxmin"])

    summary_rows = []
    for machine in ft_eval_df["machine"].unique():
        if machine not in best_zs.index:
            continue
        bzs_mae = best_zs.loc[machine, "best_zs_mae"]
        bzs_ctx = int(best_zs.loc[machine, "best_zs_ctx"])

        for exp in EXPERIMENTS:
            exp_name = exp["name"]
            ft_mask = ft_eval_df["model"].str.contains(exp_name, na=False) & (
                ft_eval_df["machine"] == machine
            )
            if not ft_mask.any():
                continue
            ft_mae = ft_eval_df.loc[ft_mask, "mae"].mean()

            if bzs_mae > 0:
                imp = (bzs_mae - ft_mae) / bzs_mae * 100
                summary_rows.append(
                    {
                        "machine": machine,
                        "experiment": exp_name,
                        "ft_mae": ft_mae,
                        "best_zs_ctx": bzs_ctx,
                        "best_zs_mae": bzs_mae,
                        "vs_best_zs_pct": imp,
                    }
                )

    if summary_rows:
        summary_df = pd.DataFrame(summary_rows)
        print("=== FT vs Best Zero-Shot ===")
        display(summary_df.round(4))

        print("\n=== Per-Experiment Summary ===")
        for exp in EXPERIMENTS:
            exp_name = exp["name"]
            ed = summary_df[summary_df["experiment"] == exp_name]
            if not ed.empty:
                ft_m = ed["ft_mae"].mean()
                zs_m = ed["best_zs_mae"].mean()
                imp_m = ed["vs_best_zs_pct"].mean()
                print(
                    f"  {exp_name}: FT={ft_m:.4f},"
                    f" ZS={zs_m:.4f},"
                    f" improvement={imp_m:+.1f}%"
                )
    else:
        print("Could not compute improvement.")

    print(f"\nCombined results: {len(combined)} rows")
    display(combined)
elif ft_eval_df.empty:
    combined = pd.DataFrame()
    print("No FT results to compare.")
else:
    combined = ft_eval_df.copy()
    print("No zero-shot baselines to compare against.")
    display(ft_eval_df)

In [None]:
# === Visualizations ===
results_dir = Path(RESULTS_DIR)
fig_dir = results_dir / "figures"
fig_dir.mkdir(parents=True, exist_ok=True)

if not ft_eval_df.empty:
    # --- 1. MAE Comparison Bar Chart (multi-experiment) ---
    fig, ax = plt.subplots(figsize=(14, 5))

    plot_rows = []

    # Add ZS baseline
    if not zs_results.empty:
        for machine in zs_results["machine"].unique():
            m_zs = zs_results[zs_results["machine"] == machine]
            plot_rows.append(
                {
                    "machine": machine,
                    "variant": "Zero-Shot",
                    "mae": m_zs["mae"].mean(),
                }
            )

    # Add FT experiments
    for exp in EXPERIMENTS:
        exp_name = exp["name"]
        exp_data = ft_eval_df[ft_eval_df["model"].str.contains(exp_name, na=False)]
        for machine in exp_data["machine"].unique():
            m_ft = exp_data[exp_data["machine"] == machine]
            plot_rows.append(
                {
                    "machine": machine,
                    "variant": exp_name,
                    "mae": m_ft["mae"].mean(),
                }
            )

    if plot_rows:
        plot_df = pd.DataFrame(plot_rows)
        sns.barplot(
            data=plot_df,
            x="machine",
            y="mae",
            hue="variant",
            ax=ax,
        )
        ax.set_ylabel("MAE (ppm)")
        ax.set_title("Moirai 1.1: FT vs Zero-Shot MAE by Machine")
        ax.legend(
            title="Variant",
            bbox_to_anchor=(1.05, 1),
            loc="upper left",
        )
        plt.tight_layout()
        fig.savefig(
            fig_dir / "moirai_ft_vs_zs_mae.png",
            dpi=150,
            bbox_inches="tight",
        )
        plt.show()
    else:
        plt.close(fig)

    # --- 2. Coverage Comparison (if available) ---
    if ft_eval_df["coverage"].notna().any():
        cov_data = ft_eval_df[ft_eval_df["coverage"].notna()]
        fig, ax = plt.subplots(figsize=(10, 5))
        sns.barplot(
            data=cov_data,
            x="machine",
            y="coverage",
            hue="model",
            ax=ax,
        )
        ax.axhline(
            0.8,
            color="red",
            linestyle="--",
            alpha=0.5,
            label="80% target",
        )
        ax.set_ylabel("Coverage")
        ax.set_title("Prediction Interval Coverage")
        ax.legend(
            title="Model",
            bbox_to_anchor=(1.05, 1),
            loc="upper left",
        )
        plt.tight_layout()
        fig.savefig(
            fig_dir / "moirai_ft_coverage.png",
            dpi=150,
            bbox_inches="tight",
        )
        plt.show()

    print(f"Saved figures to: {fig_dir}")
else:
    print("No results to visualize.")

In [None]:
# === Export Results ===
from tick2.benchmark.reporting import results_to_latex, save_results

if not ft_eval_df.empty:
    # Save combined FT results
    ft_csv = device_results_dir / f"moirai-1.1-ft-all_{TRAINING_MODE}.csv"
    ft_eval_df.to_csv(ft_csv, index=False)
    print(f"FT results CSV: {ft_csv}")

if not combined.empty:
    csv_path, latex_path = save_results(
        combined,
        results_dir,
        prefix=f"moirai_ft_{TRAINING_MODE}",
    )
    print(f"Comparison CSV:   {csv_path}")
    print(f"Comparison LaTeX: {latex_path}")
    latex = results_to_latex(
        combined,
        caption=f"Moirai 1.1 fine-tuning vs zero-shot ({TRAINING_MODE})",
        label="tab:moirai-ft",
    )
    print(f"\n{latex}")
else:
    print("No results to export.")

In [None]:
# === Final Push ===
if IN_COLAB:
    os.chdir(REPO_DIR)

    subprocess.run(
        ["git", "add", "tick2/notebooks/output/03/"],
        check=True,
    )

    status = subprocess.run(
        ["git", "status", "--porcelain", "tick2/notebooks/output/03/"],
        capture_output=True,
        text=True,
    )
    if status.stdout.strip():
        msg = f"results: notebook 03c moirai-ft figures and combined ({device_label})"
        subprocess.run(
            ["git", "commit", "-m", msg],
            check=True,
        )
        if GITHUB_TOKEN:
            subprocess.run(
                ["git", "fetch", "-q", "origin"],
                capture_output=True,
                timeout=30,
            )
            subprocess.run(
                ["git", "rebase", "origin/main"],
                capture_output=True,
                timeout=30,
            )
            subprocess.run(["git", "push"], check=True)
            print("Pushed final outputs to GitHub.")
        else:
            print("Committed locally (no token for push).")
    else:
        print("No new outputs to commit.")
else:
    print(f"Local run. Outputs saved to: {results_dir}")
    print(
        "Run 'git add tick2/notebooks/output/03/ && git commit && git push' to share."
    )