# ChronoTick 2: Granite TTM Fine-Tuning

Fine-tune IBM Granite TTM (1-5M params) with channel-mix decoder for
multivariate covariate utilization. Uses HF Trainer with frozen backbone
and early stopping.

## Experiments
- E1: Univariate FT (channel-independent, baseline)
- E2: Channel-mix FT with top-10 SHAP features
- E3: Channel-mix FT with top-30 SHAP features

## Training Mode
Set `TRAINING_MODE` in Cell 3 to control:
- `"combined"`: train on all 4 machines (default)
- `"per_machine"`: train separately per machine

In [None]:
# === Environment Setup ===
import os
import subprocess
import sys

IN_COLAB = "COLAB_GPU" in os.environ or os.path.exists("/content")

if IN_COLAB:
    REPO_DIR = "/content/sensor-collector"
    REPO_URL = "https://github.com/JaimeCernuda/sensor-collector.git"

    # Read GitHub token from Colab secrets (set via sidebar key icon).
    # Required for git push; without it the notebook runs but cannot push.
    GITHUB_TOKEN = None
    try:
        from google.colab import userdata
        GITHUB_TOKEN = userdata.get("GITHUB_TOKEN")
    except Exception:
        print("WARNING: GITHUB_TOKEN secret not available. Git push will be skipped.")
        print("  To enable: run from Colab UI with Secrets > GITHUB_TOKEN set.")

    # Build authenticated URL if token available
    if GITHUB_TOKEN:
        auth_url = f"https://{GITHUB_TOKEN}@github.com/JaimeCernuda/sensor-collector.git"
    else:
        auth_url = REPO_URL

    # Clone or pull latest tick2 code from GitHub
    if os.path.exists(REPO_DIR):
        # Update remote URL in case token was added after initial clone
        subprocess.run(["git", "-C", REPO_DIR, "remote", "set-url", "origin", auth_url], check=True)
        # Reset to remote HEAD to avoid divergence from previous Colab commits.
        # Colab outputs are regenerated each run, so local commits are disposable.
        subprocess.run(["git", "-C", REPO_DIR, "fetch", "-q", "origin"], check=True)
        subprocess.run(["git", "-C", REPO_DIR, "reset", "--hard", "origin/main"], check=True)
    else:
        subprocess.run(["git", "clone", "-q", auth_url, REPO_DIR], check=True)

    # Configure git identity (Colab has no global config)
    subprocess.run(["git", "-C", REPO_DIR, "config", "user.name", "Colab Runner"], check=True)
    subprocess.run(["git", "-C", REPO_DIR, "config", "user.email", "colab@chronotick.dev"], check=True)

    # Install tick2 package in editable mode
    subprocess.run(["pip", "install", "-q", "-e", f"{REPO_DIR}/tick2/"], check=True)

    # Ensure tick2 is importable (pip install via subprocess doesn't always
    # update sys.path in the running kernel)
    tick2_src = f"{REPO_DIR}/tick2/src"
    if tick2_src not in sys.path:
        sys.path.insert(0, tick2_src)

    # Always mount Drive â€” needed for checkpoint persistence (models too large for git)
    from google.colab import drive
    drive.mount("/content/drive")

    # Data: prefer repo copy, fall back to Drive
    DATA_DIR = f"{REPO_DIR}/sensors/data"
    if not os.path.isdir(f"{DATA_DIR}/24h_snapshot"):
        DATA_DIR = "/content/drive/MyDrive/chronotick2/data"

    # Output directory inside the repo (will be git-pushed)
    RESULTS_DIR = f"{REPO_DIR}/tick2/notebooks/output/03"
else:
    GITHUB_TOKEN = None
    DATA_DIR = None
    RESULTS_DIR = os.path.join(os.path.dirname(__file__) if "__file__" in dir() else ".", "output", "03")

print(f"Environment: {'Colab' if IN_COLAB else 'Local'}")
print(f"Data dir:    {DATA_DIR or '(default)'}")
print(f"Results dir: {RESULTS_DIR}")

In [None]:
# === Granite TTM Dependencies ===
# granite-tsfm pins torch<2.9; install with --no-deps to keep CUDA torch
import subprocess
if IN_COLAB:
    subprocess.run(["pip", "install", "-q", "granite-tsfm>=0.3.3", "--no-deps"], check=True)
    subprocess.run(["pip", "install", "-q", "transformers>=4.56,<5", "datasets", "deprecated"], check=True)

# Deep verify
from tsfm_public.models.tinytimemixer import TinyTimeMixerForPrediction
from tsfm_public import TimeSeriesPreprocessor, get_datasets
from tsfm_public.toolkit.get_model import get_model
print("granite-tsfm ready")

In [None]:
# === Imports, Config & TRAINING_MODE ===
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from pathlib import Path

from tick2.data.preprocessing import TARGET_COL, load_all, get_feature_cols
from tick2.data.splits import temporal_split, extract_samples
from tick2.finetuning.base import FineTuneConfig
from tick2.finetuning.data_prep import prepare_datasets, select_top_features
from tick2.finetuning.granite_ft import finetune_granite
from tick2.finetuning.evaluate import evaluate_finetuned, load_zero_shot_baselines, compare_ft_vs_zero_shot
from tick2.utils.gpu import clear_gpu_memory

sns.set_theme(style="whitegrid", font_scale=1.1)

TRAINING_MODE = "combined"  # "combined" or "per_machine"
DEVICE_OVERRIDE = None
FORCE_RETRAIN = False

device = DEVICE_OVERRIDE or ("cuda" if torch.cuda.is_available() else "cpu")
DEVICE_DIR_MAP = {"cuda": "gpu", "cpu": "cpu"}
device_label = DEVICE_DIR_MAP.get(device, device)

config = FineTuneConfig(
    context_length=512,
    prediction_length=96,
    max_covariates=30,
    seed=42,
)

print(f"Device: {device}")
print(f"Training mode: {TRAINING_MODE}")
print(f"Context: {config.context_length}, Horizon: {config.prediction_length}")

In [None]:
# === Load Data + Temporal Split ===
data_dir = Path(DATA_DIR) if DATA_DIR else None
prepared = prepare_datasets(config, data_dir=data_dir)
for name, p in prepared.items():
    print(f"  {name:16s}: train={len(p.split.train)}, val={len(p.split.val)}, test={len(p.split.test)}, features={len(p.feature_cols)}")

In [None]:
# === Fine-Tune Granite TTM ===
import os
import subprocess

from tick2.utils.colab import save_checkpoint_to_drive, load_checkpoint_from_drive, setup_training_log

output_base = Path(RESULTS_DIR)
ft_output_dir = output_base / "granite_ttm_ft" / TRAINING_MODE
device_results_dir = output_base / device_label
device_results_dir.mkdir(parents=True, exist_ok=True)

# Persist training logs to disk (epoch losses, early stopping, errors)
log_path = setup_training_log(ft_output_dir)
print(f"Training log: {log_path}")


def checkpoint_push(label: str) -> None:
    """Git add, commit, and push results after a step completes.

    Called after training/evaluation so progress survives Colab disconnects.
    On restart (git clone + reset --hard), pushed results are preserved and
    the resume logic skips completed steps.
    """
    if not IN_COLAB:
        return
    try:
        subprocess.run(
            ["git", "-C", REPO_DIR, "add", "tick2/notebooks/output/03/"],
            check=True, capture_output=True,
        )
        status = subprocess.run(
            ["git", "-C", REPO_DIR, "status", "--porcelain",
             "tick2/notebooks/output/03/"],
            capture_output=True, text=True,
        )
        if not status.stdout.strip():
            return  # nothing new to commit
        subprocess.run(
            ["git", "-C", REPO_DIR, "commit", "-m",
             f"results: notebook 03 granite-ttm-ft {label} ({device_label})"],
            check=True, capture_output=True,
        )
        if GITHUB_TOKEN:
            subprocess.run(
                ["git", "-C", REPO_DIR, "push"],
                check=True, capture_output=True, timeout=60,
            )
            print(f"  [CHECKPOINT] Pushed {label} results")
        else:
            print(f"  [CHECKPOINT] Committed (no token for push)")
    except Exception as e:
        print(f"  [CHECKPOINT WARNING] {e}")


# --- Experiment definitions ---
# E1: Univariate FT (channel-independent decoder, no covariates)
# E2: Channel-mix FT with top-10 SHAP features
# E3: Channel-mix FT with top-30 SHAP features (default max_covariates)
EXPERIMENTS = [
    {
        "name": "E1_univariate",
        "decoder_mode": "common_channel",
        "max_covariates": 0,
    },
    {
        "name": "E2_mix10",
        "decoder_mode": "mix_channel",
        "max_covariates": 10,
    },
    {
        "name": "E3_mix30",
        "decoder_mode": "mix_channel",
        "max_covariates": 30,
    },
]

all_ft_results = []  # FineTuneResult objects across experiments
experiment_labels = {}  # experiment name -> list of FineTuneResult

for exp in EXPERIMENTS:
    exp_name = exp["name"]
    print(f"\n{'='*60}")
    print(f"  {exp_name}  (decoder={exp['decoder_mode']}, max_cov={exp['max_covariates']})")
    print(f"{'='*60}")

    # Check for cached checkpoint (local first, then Drive)
    exp_checkpoint_dir = ft_output_dir / exp_name
    cached_flag = exp_checkpoint_dir / "best" / "config.json"

    if not cached_flag.exists() and not FORCE_RETRAIN:
        # Try restoring from Drive
        drive_model_name = f"granite_ttm_ft/{TRAINING_MODE}/{exp_name}"
        resumed = load_checkpoint_from_drive(
            model_name=drive_model_name,
            local_path=str(exp_checkpoint_dir),
        )
        if resumed:
            print(f"  [RESUMED] Loaded checkpoint from Drive: {resumed}")

    if cached_flag.exists() and not FORCE_RETRAIN:
        print(f"  [CACHED] Checkpoint exists at {exp_checkpoint_dir / 'best'}")
        # Create a stub result for tracking
        from tick2.finetuning.base import FineTuneResult
        stub = FineTuneResult(
            model_name=f"granite-ttm-ft-{exp_name}",
            machine=TRAINING_MODE,
            checkpoint_path=str(exp_checkpoint_dir / "best"),
            config=exp,
        )
        all_ft_results.append(stub)
        experiment_labels[exp_name] = [stub]
        continue

    # Prepare data with experiment-specific max_covariates
    exp_config = FineTuneConfig(
        context_length=config.context_length,
        prediction_length=config.prediction_length,
        max_covariates=exp["max_covariates"] if exp["max_covariates"] > 0 else config.max_covariates,
        seed=config.seed,
    )

    clear_gpu_memory()

    try:
        ft_results = finetune_granite(
            prepared=prepared,
            config=exp_config,
            output_dir=str(exp_checkpoint_dir),
            training_mode=TRAINING_MODE,
            decoder_mode=exp["decoder_mode"],
            freeze_backbone=True,
            learning_rate=0.001,
            num_epochs=50,
            batch_size=64,
            early_stopping_patience=10,
        )

        for r in ft_results:
            r.model_name = f"granite-ttm-ft-{exp_name}"
            print(f"  {r.machine}: {r.training_time_s:.1f}s, best_epoch={r.best_epoch}")
            if r.val_loss:
                print(f"    val_loss: {r.val_loss[r.best_epoch]:.6f}")

        all_ft_results.extend(ft_results)
        experiment_labels[exp_name] = ft_results

        # Save checkpoint to Drive for persistence
        save_checkpoint_to_drive(
            local_path=exp_checkpoint_dir,
            model_name=f"granite_ttm_ft/{TRAINING_MODE}/{exp_name}",
        )

        # Checkpoint push after each experiment
        checkpoint_push(exp_name)

    except Exception as e:
        print(f"  [FAIL] {exp_name}: {e}")
        import traceback
        traceback.print_exc()
    finally:
        clear_gpu_memory()

print(f"\n{'='*60}")
print(f"  Completed experiments: {list(experiment_labels.keys())}")
print(f"{'='*60}")

In [None]:
# === Evaluate FT Models on Test Set ===
from tick2.finetuning.granite_ft import load_finetuned_granite
from tick2.models.granite import GraniteTTMWrapper

eval_dfs = []

for exp in EXPERIMENTS:
    exp_name = exp["name"]
    print(f"\n--- Evaluating {exp_name} ---")

    # Check for cached evaluation results
    cached_eval_path = device_results_dir / f"granite-ttm-ft-{exp_name}_{TRAINING_MODE}.csv"
    if cached_eval_path.exists() and not FORCE_RETRAIN:
        print(f"  [CACHED] Loading from {cached_eval_path}")
        eval_dfs.append(pd.read_csv(cached_eval_path))
        continue

    # Find the checkpoint
    results_for_exp = experiment_labels.get(exp_name, [])
    if not results_for_exp:
        print(f"  [SKIP] No training results for {exp_name}")
        continue

    checkpoint_path = results_for_exp[0].checkpoint_path
    if not checkpoint_path or not Path(checkpoint_path).exists():
        # Try default path
        checkpoint_path = str(ft_output_dir / exp_name / TRAINING_MODE / "best")
        if not Path(checkpoint_path).exists():
            checkpoint_path = str(ft_output_dir / exp_name / "best")

    if not Path(checkpoint_path).exists():
        print(f"  [SKIP] Checkpoint not found for {exp_name}")
        continue

    clear_gpu_memory()

    try:
        # Load fine-tuned model and wrap it in a GraniteTTMWrapper-like interface
        ft_model_raw = load_finetuned_granite(
            checkpoint_path,
            context_length=config.context_length,
            prediction_length=config.prediction_length,
        )

        # Create a wrapper that conforms to ModelWrapper protocol
        wrapper = GraniteTTMWrapper(
            model_name=f"granite-ttm-ft-{exp_name}",
            context_length=config.context_length,
            prediction_length=config.prediction_length,
        )
        # Replace the internal model with our fine-tuned one
        wrapper._model = ft_model_raw
        wrapper._device = device
        wrapper._model.to(device)

        # Extract training metadata
        ft_epochs = results_for_exp[0].best_epoch if results_for_exp else None
        ft_time = results_for_exp[0].training_time_s if results_for_exp else None
        ft_machines = results_for_exp[0].machine if results_for_exp else ""

        eval_df = evaluate_finetuned(
            model=wrapper,
            prepared=prepared,
            config=config,
            training_mode=f"ft_{TRAINING_MODE}",
            ft_epochs=ft_epochs,
            ft_time_s=ft_time,
            ft_train_machines=ft_machines,
            context_lengths=[config.context_length],
            horizons=[60, 96],
            n_samples=25,
            progress=True,
        )

        if not eval_df.empty:
            # Tag with experiment name
            eval_df["experiment"] = exp_name
            eval_df.to_csv(cached_eval_path, index=False)
            eval_dfs.append(eval_df)
            print(f"  Mean MAE: {eval_df['mae'].mean():.4f}")
            print(f"  Saved: {cached_eval_path}")
        else:
            print(f"  [WARN] No evaluation results for {exp_name}")

        checkpoint_push(f"eval-{exp_name}")

    except Exception as e:
        print(f"  [FAIL] Evaluation {exp_name}: {e}")
        import traceback
        traceback.print_exc()
    finally:
        del wrapper, ft_model_raw
        clear_gpu_memory()

# Combine all evaluation results
if eval_dfs:
    ft_results_df = pd.concat(eval_dfs, ignore_index=True)
    print(f"\nTotal FT evaluation rows: {len(ft_results_df)}")
    display(ft_results_df)
else:
    ft_results_df = pd.DataFrame()
    print("No evaluation results collected.")

In [None]:
# === Load Zero-Shot Baselines from Notebook 02 ===
zs_dir = Path(RESULTS_DIR).parent / "02"
zs_results = load_zero_shot_baselines(zs_dir, model_name="granite-ttm")

if not zs_results.empty:
    print(f"Zero-shot baselines: {len(zs_results)} rows")
    print(f"  Machines: {zs_results['machine'].unique().tolist()}")
    print(f"  Mean MAE: {zs_results['mae'].mean():.4f}")
else:
    print("No zero-shot baselines found in output/02/. Run notebook 02 first.")
    print(f"  Searched: {zs_dir}")

In [None]:
# === Comparison Table: FT vs Zero-Shot ===
if not ft_results_df.empty and not zs_results.empty:
    comparison = compare_ft_vs_zero_shot(ft_results_df, zs_results)

    # Compute improvement percentage per machine
    summary_rows = []
    for machine in comparison["machine"].unique():
        m_data = comparison[comparison["machine"] == machine]
        zs_mae = m_data[m_data["training_mode"] == "zero_shot"]["mae"].mean()

        for exp_name in [e["name"] for e in EXPERIMENTS]:
            ft_mask = m_data["model"].str.contains(exp_name, na=False)
            ft_mae = m_data[ft_mask]["mae"].mean() if ft_mask.any() else None

            if ft_mae is not None and zs_mae > 0:
                improvement = (zs_mae - ft_mae) / zs_mae * 100
                summary_rows.append({
                    "machine": machine,
                    "experiment": exp_name,
                    "zs_mae": zs_mae,
                    "ft_mae": ft_mae,
                    "improvement_pct": improvement,
                })

    if summary_rows:
        summary_df = pd.DataFrame(summary_rows)
        print("\n=== FT vs Zero-Shot Improvement ===")
        display(summary_df.round(4))

        print("\n=== Overall ===")
        for exp_name in [e["name"] for e in EXPERIMENTS]:
            exp_data = summary_df[summary_df["experiment"] == exp_name]
            if not exp_data.empty:
                mean_imp = exp_data["improvement_pct"].mean()
                mean_ft_mae = exp_data["ft_mae"].mean()
                print(f"  {exp_name}: mean MAE={mean_ft_mae:.4f}, mean improvement={mean_imp:+.1f}%")
    else:
        print("Could not compute improvement (check data alignment).")

    print("\n=== Full Comparison ===")
    display(comparison)
elif ft_results_df.empty:
    print("No FT results to compare.")
else:
    print("No zero-shot baselines to compare against.")
    if not ft_results_df.empty:
        print("\n=== FT Results (standalone) ===")
        display(ft_results_df)

In [None]:
# === Visualizations ===
results_dir = Path(RESULTS_DIR)
results_dir.mkdir(parents=True, exist_ok=True)

# --- 1. MAE Comparison Bar Chart: FT vs Zero-Shot per Machine ---
if not ft_results_df.empty:
    fig, ax = plt.subplots(figsize=(12, 5))

    # Build comparison data
    plot_rows = []

    # Add zero-shot baseline
    if not zs_results.empty:
        for machine in zs_results["machine"].unique():
            m_zs = zs_results[zs_results["machine"] == machine]
            plot_rows.append({
                "machine": machine,
                "variant": "Zero-Shot",
                "mae": m_zs["mae"].mean(),
            })

    # Add FT experiments
    for exp_name in [e["name"] for e in EXPERIMENTS]:
        exp_data = ft_results_df[ft_results_df["model"].str.contains(exp_name, na=False)]
        for machine in exp_data["machine"].unique():
            m_ft = exp_data[exp_data["machine"] == machine]
            plot_rows.append({
                "machine": machine,
                "variant": exp_name,
                "mae": m_ft["mae"].mean(),
            })

    if plot_rows:
        plot_df = pd.DataFrame(plot_rows)
        sns.barplot(data=plot_df, x="machine", y="mae", hue="variant", ax=ax)
        ax.set_ylabel("MAE (ppm)")
        ax.set_title("Granite TTM: Fine-Tuned vs Zero-Shot MAE by Machine")
        ax.legend(title="Variant", bbox_to_anchor=(1.05, 1), loc="upper left")
        plt.tight_layout()
        fig.savefig(results_dir / "ft_vs_zs_mae_comparison.png", dpi=150, bbox_inches="tight")
        plt.show()
    else:
        plt.close(fig)
        print("No data for MAE comparison plot.")

# --- 2. Training Loss Curves ---
if all_ft_results:
    fig, axes = plt.subplots(1, len(EXPERIMENTS), figsize=(5 * len(EXPERIMENTS), 4), squeeze=False)

    for idx, exp in enumerate(EXPERIMENTS):
        ax = axes[0, idx]
        exp_name = exp["name"]
        results_for_exp = experiment_labels.get(exp_name, [])

        has_data = False
        for r in results_for_exp:
            if r.train_loss:
                ax.plot(r.train_loss, label="Train", alpha=0.7)
                has_data = True
            if r.val_loss:
                ax.plot(r.val_loss, label="Validation", alpha=0.7)
                # Mark best epoch
                ax.axvline(r.best_epoch, color="red", linestyle="--", alpha=0.5, label=f"Best (epoch {r.best_epoch})")
                has_data = True

        ax.set_xlabel("Epoch")
        ax.set_ylabel("Loss")
        ax.set_title(exp_name)
        if has_data:
            ax.legend(fontsize=8)
        else:
            ax.text(0.5, 0.5, "(cached, no loss history)", transform=ax.transAxes, ha="center")

    plt.suptitle("Granite TTM Fine-Tuning Loss Curves", fontsize=14)
    plt.tight_layout()
    fig.savefig(results_dir / "ft_training_loss_curves.png", dpi=150, bbox_inches="tight")
    plt.show()

# --- 3. Coverage Comparison (if applicable) ---
if not ft_results_df.empty and "coverage" in ft_results_df.columns:
    cov_data = ft_results_df[ft_results_df["coverage"].notna()]
    if not cov_data.empty:
        fig, ax = plt.subplots(figsize=(10, 5))
        sns.barplot(data=cov_data, x="machine", y="coverage", hue="model", ax=ax)
        ax.axhline(0.8, color="red", linestyle="--", alpha=0.5, label="80% target")
        ax.set_ylabel("Coverage")
        ax.set_title("Prediction Interval Coverage by Machine")
        ax.legend(title="Model", bbox_to_anchor=(1.05, 1), loc="upper left")
        plt.tight_layout()
        fig.savefig(results_dir / "ft_coverage_comparison.png", dpi=150, bbox_inches="tight")
        plt.show()

print(f"Saved figures to: {results_dir}")

In [None]:
# === Export Results CSV + LaTeX ===
from tick2.benchmark.reporting import save_results, results_to_latex

results_dir = Path(RESULTS_DIR)
device_results_dir = results_dir / device_label
device_results_dir.mkdir(parents=True, exist_ok=True)

# Save FT results
if not ft_results_df.empty:
    ft_csv_path = device_results_dir / f"granite-ttm-ft_{TRAINING_MODE}.csv"
    ft_results_df.to_csv(ft_csv_path, index=False)
    print(f"FT results CSV: {ft_csv_path}")

# Save combined comparison
if not ft_results_df.empty and not zs_results.empty:
    comparison = compare_ft_vs_zero_shot(ft_results_df, zs_results)
    csv_path, latex_path = save_results(
        comparison, results_dir, prefix="granite_ttm_ft_comparison"
    )
    print(f"\nComparison CSV:   {csv_path}")
    print(f"Comparison LaTeX: {latex_path}")
    print(f"\n{results_to_latex(comparison, caption='Granite TTM fine-tuning vs zero-shot', label='tab:granite-ft')}")
elif not ft_results_df.empty:
    csv_path, latex_path = save_results(
        ft_results_df, results_dir, prefix="granite_ttm_ft"
    )
    print(f"FT-only CSV:   {csv_path}")
    print(f"FT-only LaTeX: {latex_path}")
    print(f"\n{results_to_latex(ft_results_df, caption='Granite TTM fine-tuning results', label='tab:granite-ft')}")
else:
    print("No results to export.")

# Save training metadata
if all_ft_results:
    meta_rows = []
    for r in all_ft_results:
        meta_rows.append({
            "model": r.model_name,
            "machine": r.machine,
            "training_time_s": r.training_time_s,
            "best_epoch": r.best_epoch,
            "checkpoint_path": r.checkpoint_path,
            **r.config,
        })
    meta_df = pd.DataFrame(meta_rows)
    meta_path = results_dir / f"granite_ttm_ft_training_meta_{TRAINING_MODE}.csv"
    meta_df.to_csv(meta_path, index=False)
    print(f"\nTraining metadata: {meta_path}")
    display(meta_df)

In [None]:
# === Save & Push Results ===
# Final push: figures, combined CSVs, and LaTeX tables.
# Per-experiment results were already pushed incrementally by checkpoint_push().
if IN_COLAB:
    os.chdir(REPO_DIR)

    # Stage all fine-tuning outputs (figures, combined files, any stragglers)
    subprocess.run(["git", "add", "tick2/notebooks/output/03/"], check=True)

    # Check if there's anything new to commit
    status = subprocess.run(
        ["git", "status", "--porcelain", "tick2/notebooks/output/03/"],
        capture_output=True, text=True,
    )
    if status.stdout.strip():
        subprocess.run(
            ["git", "commit", "-m",
             f"results: notebook 03 granite-ttm-ft figures and combined results ({device_label})"],
            check=True,
        )
        if GITHUB_TOKEN:
            subprocess.run(["git", "push"], check=True)
            print("Pushed final outputs (figures + combined CSV) to GitHub.")
        else:
            print("Committed locally but GITHUB_TOKEN not set -- skipping push.")
            print("Set the secret in Colab sidebar > Secrets > GITHUB_TOKEN")
    else:
        print("No new outputs to commit (per-experiment checkpoints already pushed).")
else:
    print(f"Local run. Outputs saved to: {results_dir}")
    print("Run 'git add tick2/notebooks/output/03/ && git commit && git push' to share.")