# Full Soil Pipeline (Stable) - Colab GPU

This notebook runs the full pipeline with stability guards:
- Normalized training data + coordinates
- GA Bare Earth extraction
- Official pretrained SpectralGPT embeddings
- Alpha and fused ResNet training
- CV summary + independent validation
- Output export to Drive

Key stability fix for normalized targets:
- `esp_consistency_weight = 0.0` (avoids NaN collapse on normalized target space)

## 1) Runtime setup

Set runtime to GPU in Colab before running.

This notebook writes live progress to `results/metrics/colab_progress.json` and prints stage progress in each cell.


In [None]:
from google.colab import drive
import os
import json
import shutil
import subprocess
from datetime import datetime, timezone
from pathlib import Path

USE_GIT_CLONE = True
REPO_GIT_URL = "https://github.com/JackOnThePaddock/soil-resnet-model.git"
DRIVE_REPO_DIR = "/content/drive/MyDrive/soil-resnet-model"
PROJECT_DIR = "/content/soil-resnet-model"

PROGRESS_STEPS = [
    ("runtime_setup", "Runtime setup"),
    ("deps_install", "Install dependencies"),
    ("gpu_check", "GPU check"),
    ("run_config", "Run config"),
    ("normalized_points", "Prepare normalized points"),
    ("bare_earth_sgpt", "Pull Bare Earth + SpectralGPT"),
    ("fused_table", "Build fused feature table"),
    ("write_configs", "Write training configs"),
    ("train_alpha", "Train alpha model"),
    ("train_fused", "Train fused model"),
    ("checkpoint_validation", "Checkpoint validation"),
    ("cv_summary", "CV summary"),
    ("independent_validation", "Independent validation"),
    ("save_outputs", "Save outputs"),
]
PROGRESS_INDEX = {k: i + 1 for i, (k, _) in enumerate(PROGRESS_STEPS)}
PROGRESS_LABELS = {k: v for k, v in PROGRESS_STEPS}
# Use a bootstrap path until git clone completes, then switch to project-local progress file.
PROGRESS_FILE = Path("/content/colab_progress_bootstrap.json")
PROGRESS_STATE = {}


def _save_progress():
    PROGRESS_FILE.parent.mkdir(parents=True, exist_ok=True)
    payload = {
        "updated_at": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
        "steps": PROGRESS_STATE,
    }
    PROGRESS_FILE.write_text(json.dumps(payload, indent=2), encoding="utf-8")


def set_progress_file(path):
    global PROGRESS_FILE
    PROGRESS_FILE = Path(path)
    _save_progress()


def progress_update(step_key, status, message=""):
    if step_key not in PROGRESS_INDEX:
        raise KeyError(f"Unknown progress step: {step_key}")
    order = PROGRESS_INDEX[step_key]
    label = PROGRESS_LABELS[step_key]
    PROGRESS_STATE[step_key] = {
        "order": order,
        "label": label,
        "status": status,
        "message": message,
        "timestamp_utc": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
    }
    _save_progress()
    done_count = sum(1 for s in PROGRESS_STATE.values() if s["status"] in {"done", "skipped"})
    pct = int((done_count / len(PROGRESS_STEPS)) * 100)
    print(f"[{order:02d}/{len(PROGRESS_STEPS):02d}] {label} -> {status} | overall {pct}%")
    if message:
        print("   ", message)


def progress_start(step_key, message=""):
    progress_update(step_key, "running", message)


def progress_done(step_key, message=""):
    progress_update(step_key, "done", message)


def progress_skip(step_key, message=""):
    progress_update(step_key, "skipped", message)


def progress_fail(step_key, message=""):
    progress_update(step_key, "failed", message)


def show_progress():
    import pandas as _pd

    rows = []
    for key, label in PROGRESS_STEPS:
        rec = PROGRESS_STATE.get(key, {"status": "pending", "message": "", "timestamp_utc": ""})
        rows.append(
            {
                "step": PROGRESS_INDEX[key],
                "label": label,
                "status": rec["status"],
                "message": rec.get("message", ""),
                "timestamp_utc": rec.get("timestamp_utc", ""),
            }
        )
    display(_pd.DataFrame(rows))


try:
    if os.path.exists(PROJECT_DIR):
        shutil.rmtree(PROJECT_DIR)

    progress_start("runtime_setup", "Mount Drive and fetch repository")
    drive.mount('/content/drive', force_remount=True)

    if USE_GIT_CLONE:
        subprocess.run(["git", "clone", REPO_GIT_URL, PROJECT_DIR], check=True)
    else:
        if not os.path.exists(DRIVE_REPO_DIR):
            raise FileNotFoundError(f"Repo not found at {DRIVE_REPO_DIR}")
        shutil.copytree(DRIVE_REPO_DIR, PROJECT_DIR)

    os.chdir(PROJECT_DIR)
    set_progress_file(Path(PROJECT_DIR) / "results/metrics/colab_progress.json")
    print("Project dir:", os.getcwd())
    progress_done("runtime_setup", f"Project dir set to {os.getcwd()}")
except Exception as e:
    progress_fail("runtime_setup", str(e))
    raise


In [None]:
progress_start("deps_install", "Install notebook dependencies")

try:
    commands = [
        ["python", "-V"],
        ["pip", "install", "--upgrade", "pip"],
        ["pip", "install", "-e", "."],
    ]
    for cmd in commands:
        print("Running:", " ".join(cmd))
        subprocess.run(cmd, check=True)

    progress_done("deps_install", "Dependencies installed")
except Exception as e:
    progress_fail("deps_install", str(e))
    raise


In [None]:
progress_start("gpu_check", "Inspect runtime accelerator")

try:
    import torch

    print("Torch:", torch.__version__)
    print("CUDA available:", torch.cuda.is_available())
    if torch.cuda.is_available():
        gpu_name = torch.cuda.get_device_name(0)
        print("GPU:", gpu_name)
        progress_done("gpu_check", f"CUDA GPU detected: {gpu_name}")
    else:
        progress_done("gpu_check", "No CUDA GPU detected")
except Exception as e:
    progress_fail("gpu_check", str(e))
    raise


## 2) Run config

In [None]:
progress_start("run_config", "Load run flags and paths")

try:
    import json
    import yaml
    import numpy as np
    import pandas as pd

    FAST_MODE = False
    RUN_BARE_EARTH_PULL = True
    RUN_ALPHA = True
    RUN_FUSED = True
    RUN_INDEPENDENT_VALIDATION = True

    TRAINING = {
        "ensemble_size": 3 if FAST_MODE else 5,
        "n_splits": 3 if FAST_MODE else 5,
        "epochs_alpha": 60 if FAST_MODE else 300,
        "final_epochs_alpha": 40 if FAST_MODE else 200,
        "epochs_fused": 80 if FAST_MODE else 350,
        "final_epochs_fused": 60 if FAST_MODE else 220,
        "batch_size": 32,
        "learning_rate": 5e-5 if FAST_MODE else 8e-5,
        "weight_decay": 1e-4,
        "patience": 20 if FAST_MODE else 35,
        "lr_patience": 10 if FAST_MODE else 20,
        "lr_factor": 0.5,
        "grad_clip": 1.0,
        "random_seed": 42,
    }

    WCS_WORKERS = 16
    WCS_TIMEOUT = 120
    WCS_RETRIES = 3
    SPECTRAL_DIM = 16

    paths = {
        "raw": Path("data/processed/features.csv"),
        "normalized": Path("data/processed/features_normalized.csv"),
        "normalized_points": Path("data/processed/features_normalized_points.csv"),
        "be_sgpt": Path("data/processed/features_normalized_bareearth_sgpt.csv"),
        "sgpt_only": Path("data/processed/features_normalized_sgpt_embeddings.csv"),
        "sgpt_raw": Path("data/processed/features_normalized_sgpt_official_raw.csv"),
        "fused_feat": Path("data/processed/features_normalized_fused_feat.csv"),
        "fused_meta": Path("data/processed/features_normalized_fused_meta.json"),
        "cfg_alpha": Path("configs/colab_resnet_alpha_normalized_stable.yaml"),
        "cfg_fused": Path("configs/colab_resnet_fused_normalized_stable.yaml"),
        "model_alpha": Path("models/colab_resnet_alpha_norm_stable"),
        "model_fused": Path("models/colab_resnet_fused_norm_stable"),
        "validation": Path("data/validation/national_independent_1368.csv"),
    }

    for req in [paths["raw"], paths["normalized"]]:
        if not req.exists():
            raise FileNotFoundError(f"Missing required file: {req}")

    print("Config ready")
    progress_done("run_config", "Config loaded")
except Exception as e:
    progress_fail("run_config", str(e))
    raise


## 3) Prepare normalized training table with coordinates

In [None]:
progress_start("normalized_points", "Build normalized training points table")

try:
    raw_meta = pd.read_csv(paths["raw"], usecols=["id", "lat", "lon"])
    norm_df = pd.read_csv(paths["normalized"])
    if len(raw_meta) != len(norm_df):
        raise ValueError(f"Row mismatch raw={len(raw_meta)} normalized={len(norm_df)}")

    norm_points = pd.concat([raw_meta.reset_index(drop=True), norm_df.reset_index(drop=True)], axis=1)
    paths["normalized_points"].parent.mkdir(parents=True, exist_ok=True)
    norm_points.to_csv(paths["normalized_points"], index=False)
    print("Saved", paths["normalized_points"], norm_points.shape)

    progress_done("normalized_points", f"Saved {paths['normalized_points'].name} with {len(norm_points)} rows")
except Exception as e:
    progress_fail("normalized_points", str(e))
    raise


## 4) Pull Bare Earth + official SpectralGPT embeddings

In [None]:
import subprocess

progress_start("bare_earth_sgpt", "Pull Bare Earth and create official SpectralGPT embeddings")

try:
    if RUN_BARE_EARTH_PULL:
        cmd = [
            "python", "scripts/pull_bare_earth_embeddings.py",
            "--normalized-csv", str(paths["normalized"]),
            "--points-csv", str(paths["raw"]),
            "--output-csv", str(paths["be_sgpt"]),
            "--output-embeddings-csv", str(paths["sgpt_only"]),
            "--output-official-raw-csv", str(paths["sgpt_raw"]),
            "--workers", str(WCS_WORKERS),
            "--timeout", str(WCS_TIMEOUT),
            "--retries", str(WCS_RETRIES),
            "--spectral-backend", "official_pretrained",
            "--official-request-chunk-size", "64",
            "--spectral-dim", str(SPECTRAL_DIM),
            "--seed", str(TRAINING["random_seed"]),
        ]
        print("Running:", " ".join(cmd))
        subprocess.run(cmd, check=True)
        progress_done("bare_earth_sgpt", "Bare Earth + SpectralGPT features created")
    else:
        for p in [paths["be_sgpt"], paths["sgpt_only"]]:
            if not p.exists():
                raise FileNotFoundError(f"Missing expected file: {p}")
        progress_skip("bare_earth_sgpt", "Using existing Bare Earth + SpectralGPT files")

    print("be+sgpt exists:", paths["be_sgpt"].exists())
    print("sgpt exists:", paths["sgpt_only"].exists())
except Exception as e:
    progress_fail("bare_earth_sgpt", str(e))
    raise


## 5) Build fused feature table (`feat_*`)

In [None]:
import re

progress_start("fused_table", "Build fused feat_* table")

try:
    fused = pd.read_csv(paths["be_sgpt"])

    def sort_numeric_suffix(cols):
        def key(c):
            m = re.search(r"(\d+)$", c)
            return (0, int(m.group(1)), c) if m else (1, -1, c)
        return sorted(cols, key=key)

    band_cols = sort_numeric_suffix([c for c in fused.columns if c.lower().startswith("band_")])
    be_cols = sort_numeric_suffix([c for c in fused.columns if c.lower().startswith("be_")])
    sgpt_cols = sort_numeric_suffix([c for c in fused.columns if c.lower().startswith("sgpt_")])

    if len(band_cols) != 64:
        raise ValueError(f"Expected 64 band columns, got {len(band_cols)}")
    if len(sgpt_cols) == 0:
        raise ValueError("No sgpt columns found")

    source_cols = band_cols + be_cols + sgpt_cols
    for i, c in enumerate(source_cols):
        fused[f"feat_{i:03d}"] = fused[c]

    paths["fused_feat"].parent.mkdir(parents=True, exist_ok=True)
    fused.to_csv(paths["fused_feat"], index=False)

    meta = {
        "alpha_cols": band_cols,
        "bareearth_cols": be_cols,
        "spectral_cols": sgpt_cols,
        "feat_cols": [f"feat_{i:03d}" for i in range(len(source_cols))],
    }
    paths["fused_meta"].write_text(json.dumps(meta, indent=2), encoding="utf-8")

    print("Saved", paths["fused_feat"], fused.shape)
    print({"band": len(band_cols), "be": len(be_cols), "sgpt": len(sgpt_cols), "feat": len(meta["feat_cols"])})
    progress_done("fused_table", f"Saved {len(meta['feat_cols'])} fused features")
except Exception as e:
    progress_fail("fused_table", str(e))
    raise


## 6) Write stable training configs (normalized targets)

In [None]:
progress_start("write_configs", "Write stable training configs")

try:
    # Clean old model dirs for a fresh run
    import shutil
    for d in [paths["model_alpha"], paths["model_fused"]]:
        if d.exists():
            shutil.rmtree(d)

    TARGETS = ["ph", "cec", "esp", "soc", "ca", "mg", "na"]

    fused_cols_df = pd.read_csv(paths["fused_feat"], nrows=1)
    n_fused_features = len([c for c in fused_cols_df.columns if c.startswith("feat_")])

    # IMPORTANT: esp_consistency_weight=0.0 for normalized targets
    alpha_cfg = {
        "model": {"input_dim": 64, "hidden_dim": 128, "num_blocks": 2, "dropout": 0.2, "activation": "silu"},
        "targets": ["pH", "CEC", "ESP", "SOC", "Ca", "Mg", "Na"],
        "training": {
            "ensemble_size": TRAINING["ensemble_size"],
            "n_splits": TRAINING["n_splits"],
            "epochs": TRAINING["epochs_alpha"],
            "final_epochs": TRAINING["final_epochs_alpha"],
            "batch_size": TRAINING["batch_size"],
            "learning_rate": TRAINING["learning_rate"],
            "weight_decay": TRAINING["weight_decay"],
            "patience": TRAINING["patience"],
            "lr_patience": TRAINING["lr_patience"],
            "lr_factor": TRAINING["lr_factor"],
            "grad_clip": TRAINING["grad_clip"],
            "cv_strategy": "group_kfold",
            "loss_name": "weighted_huber",
            "huber_delta": 1.0,
            "esp_consistency_weight": 0.0,
            "target_weight_mode": "inverse_frequency",
            "sample_weight_mode": "rare_target_average",
            "auto_target_transforms": False,
            "target_transforms": {t: "identity" for t in TARGETS},
            "specialist_targets": ["cec", "esp", "soc"],
            "specialist_epochs": 60 if FAST_MODE else 120,
            "specialist_patience": 15 if FAST_MODE else 20,
            "specialist_val_fraction": 0.2,
            "specialist_blend_weight": 0.4,
            "random_seed": TRAINING["random_seed"],
        },
        "data": {"feature_prefix": "band_", "n_features": 64, "group_by": "latlon", "group_round": 4, "reference_data": None},
        "output": {"model_dir": str(paths["model_alpha"]), "metrics_dir": "results/metrics", "scaler_file": "scaler.pkl"},
    }

    fused_cfg = {
        "model": {"input_dim": n_fused_features, "hidden_dim": 192, "num_blocks": 3, "dropout": 0.2, "activation": "silu"},
        "targets": ["pH", "CEC", "ESP", "SOC", "Ca", "Mg", "Na"],
        "training": {
            "ensemble_size": TRAINING["ensemble_size"],
            "n_splits": TRAINING["n_splits"],
            "epochs": TRAINING["epochs_fused"],
            "final_epochs": TRAINING["final_epochs_fused"],
            "batch_size": TRAINING["batch_size"],
            "learning_rate": TRAINING["learning_rate"],
            "weight_decay": TRAINING["weight_decay"],
            "patience": TRAINING["patience"],
            "lr_patience": TRAINING["lr_patience"],
            "lr_factor": TRAINING["lr_factor"],
            "grad_clip": TRAINING["grad_clip"],
            "cv_strategy": "group_kfold",
            "loss_name": "weighted_huber",
            "huber_delta": 1.0,
            "esp_consistency_weight": 0.0,
            "target_weight_mode": "inverse_frequency",
            "sample_weight_mode": "rare_target_average",
            "auto_target_transforms": False,
            "target_transforms": {t: "identity" for t in TARGETS},
            "specialist_targets": ["cec", "esp", "soc"],
            "specialist_epochs": 80 if FAST_MODE else 150,
            "specialist_patience": 15 if FAST_MODE else 25,
            "specialist_val_fraction": 0.2,
            "specialist_blend_weight": 0.4,
            "random_seed": TRAINING["random_seed"],
        },
        "data": {"feature_prefix": "feat_", "n_features": None, "group_by": "latlon", "group_round": 4, "reference_data": None},
        "output": {"model_dir": str(paths["model_fused"]), "metrics_dir": "results/metrics", "scaler_file": "scaler.pkl"},
    }

    paths["cfg_alpha"].write_text(yaml.safe_dump(alpha_cfg, sort_keys=False), encoding="utf-8")
    paths["cfg_fused"].write_text(yaml.safe_dump(fused_cfg, sort_keys=False), encoding="utf-8")
    print("Wrote", paths["cfg_alpha"])
    print("Wrote", paths["cfg_fused"])

    progress_done("write_configs", "Training configs written")
except Exception as e:
    progress_fail("write_configs", str(e))
    raise


## 7) Train alpha model

In [None]:
import subprocess

progress_start("train_alpha", "Train alpha model ensemble")

try:
    if RUN_ALPHA:
        cmd = [
            "python", "scripts/train_resnet_ensemble.py",
            "--data", str(paths["normalized_points"]),
            "--config", str(paths["cfg_alpha"]),
            "--output", str(paths["model_alpha"]),
            "--cv-strategy", "group_kfold",
            "--seed", str(TRAINING["random_seed"]),
        ]
        print("Running:", " ".join(cmd))
        subprocess.run(cmd, check=True)
        progress_done("train_alpha", "Alpha ensemble training finished")
    else:
        print("Skipped alpha training")
        progress_skip("train_alpha", "RUN_ALPHA=False")
except Exception as e:
    progress_fail("train_alpha", str(e))
    raise


## 8) Train fused model

In [None]:
progress_start("train_fused", "Train fused model ensemble")

try:
    if RUN_FUSED:
        cmd = [
            "python", "scripts/train_resnet_ensemble.py",
            "--data", str(paths["fused_feat"]),
            "--config", str(paths["cfg_fused"]),
            "--output", str(paths["model_fused"]),
            "--cv-strategy", "group_kfold",
            "--seed", str(TRAINING["random_seed"]),
        ]
        print("Running:", " ".join(cmd))
        subprocess.run(cmd, check=True)
        progress_done("train_fused", "Fused ensemble training finished")
    else:
        print("Skipped fused training")
        progress_skip("train_fused", "RUN_FUSED=False")
except Exception as e:
    progress_fail("train_fused", str(e))
    raise


## 9) Check model checkpoints for NaN/Inf (fail fast)

In [None]:
import torch

progress_start("checkpoint_validation", "Validate checkpoint tensors for NaN/Inf")

try:
    def check_model_dir(model_dir: Path):
        model_files = sorted(model_dir.glob("model_*.pth"))
        if not model_files:
            raise FileNotFoundError(f"No model_*.pth found in {model_dir}")

        print("Checking", model_dir)
        for mf in model_files:
            ckpt = torch.load(mf, map_location="cpu", weights_only=False)
            state = ckpt["model_state_dict"]
            bad = []
            for k, v in state.items():
                if torch.is_tensor(v) and (torch.isnan(v).any() or torch.isinf(v).any()):
                    bad.append(k)
            if bad:
                raise RuntimeError(f"Invalid weights in {mf.name}: {bad[:5]}")
            print("  OK", mf.name)

    if RUN_ALPHA:
        check_model_dir(paths["model_alpha"])
    if RUN_FUSED:
        check_model_dir(paths["model_fused"])
    print("All checkpoints finite")
    progress_done("checkpoint_validation", "All checkpoints finite")
except Exception as e:
    progress_fail("checkpoint_validation", str(e))
    raise


## 10) CV summary from `ensemble_metrics.csv`

In [None]:
import re

progress_start("cv_summary", "Summarize cross-validation metrics")

try:
    def summarize_ensemble_metrics(csv_path: Path, model_label: str) -> pd.DataFrame:
        if not csv_path.exists():
            return pd.DataFrame([{"model": model_label, "target": "(missing)", "metric": "", "mean": np.nan, "std": np.nan}])
        df = pd.read_csv(csv_path)
        rows = []
        for col in df.columns:
            m = re.match(r"^(.*)_(r2|rmse|mae)$", col)
            if m:
                rows.append({
                    "model": model_label,
                    "target": m.group(1).lower(),
                    "metric": m.group(2),
                    "mean": float(df[col].mean()),
                    "std": float(df[col].std(ddof=0)),
                })
        if not rows:
            rows.append({"model": model_label, "target": "(no per-target metrics)", "metric": "", "mean": np.nan, "std": np.nan})
        return pd.DataFrame(rows)

    alpha_cv = summarize_ensemble_metrics(paths["model_alpha"] / "ensemble_metrics.csv", "alpha_norm_stable")
    fused_cv = summarize_ensemble_metrics(paths["model_fused"] / "ensemble_metrics.csv", "fused_norm_stable")
    cv_summary = pd.concat([alpha_cv, fused_cv], ignore_index=True)

    display(cv_summary)

    cv_out = Path("results/metrics/colab_cv_summary_stable.csv")
    cv_out.parent.mkdir(parents=True, exist_ok=True)
    cv_summary.to_csv(cv_out, index=False)
    print("Saved", cv_out)
    progress_done("cv_summary", "Saved CV summary CSV")
except Exception as e:
    progress_fail("cv_summary", str(e))
    raise


## 11) Independent validation (alpha model)

In [None]:
progress_start("independent_validation", "Run independent validation set")

try:
    if RUN_INDEPENDENT_VALIDATION:
        from src.models.ensemble import SoilEnsemble
        from src.evaluation.metrics import compute_metrics

        val_path = paths["validation"]
        if not val_path.exists():
            raise FileNotFoundError(f"Missing validation file: {val_path}")

        val_df = pd.read_csv(val_path)
        for i in range(64):
            src = f"A{i:02d}"
            if src in val_df.columns:
                val_df[f"band_{i}"] = val_df[src]

        feature_cols = [f"band_{i}" for i in range(64)]
        missing = [c for c in feature_cols if c not in val_df.columns]
        if missing:
            raise ValueError(f"Validation missing feature columns: {missing[:5]}")

        model = SoilEnsemble(paths["model_alpha"])
        pred_norm, _ = model.predict_batch(val_df[feature_cols].values.astype(np.float32))

        targets = ["ph", "cec", "esp", "soc", "ca", "mg", "na"]
        train_raw = pd.read_csv(paths["raw"], usecols=targets)
        mu = train_raw.mean(axis=0)
        sigma = train_raw.std(axis=0, ddof=0).replace(0.0, 1.0)

        pred_norm_df = pd.DataFrame(pred_norm, columns=model.target_names)
        pred_raw_df = pred_norm_df.copy()
        for t in model.target_names:
            pred_raw_df[t] = pred_norm_df[t] * float(sigma[t]) + float(mu[t])

        y_true_raw = pd.DataFrame(np.nan, index=val_df.index, columns=model.target_names)
        truth_map = {"ph": "ph", "cec": "cec_cmolkg", "esp": "esp_pct", "na": "na_cmolkg"}
        for t, src_col in truth_map.items():
            if src_col in val_df.columns:
                y_true_raw[t] = pd.to_numeric(val_df[src_col], errors="coerce")

        metrics = compute_metrics(
            y_true=y_true_raw[model.target_names].values,
            y_pred=pred_raw_df[model.target_names].values,
            target_names=model.target_names,
        )

        indep_df = pd.DataFrame(metrics).T.reset_index().rename(columns={"index": "target"}) if metrics else pd.DataFrame()
        display(indep_df)

        indep_out = Path("results/metrics/colab_independent_validation_alpha_stable.csv")
        indep_df.to_csv(indep_out, index=False)
        print("Saved", indep_out)
        progress_done("independent_validation", "Saved independent validation CSV")
    else:
        print("Skipped independent validation")
        progress_skip("independent_validation", "RUN_INDEPENDENT_VALIDATION=False")
except Exception as e:
    progress_fail("independent_validation", str(e))
    raise


## 12) Save outputs to Drive

In [None]:
from datetime import datetime, timezone

progress_start("save_outputs", "Copy outputs to Google Drive")

try:
    stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    drive_out = Path(f"/content/drive/MyDrive/soil-resnet-outputs/full_pipeline_stable_{stamp}")
    drive_out.mkdir(parents=True, exist_ok=True)

    copy_targets = [
        paths["normalized_points"],
        paths["be_sgpt"],
        paths["sgpt_only"],
        paths["sgpt_raw"],
        paths["fused_feat"],
        paths["fused_meta"],
        paths["cfg_alpha"],
        paths["cfg_fused"],
        Path("results/metrics/colab_cv_summary_stable.csv"),
        Path("results/metrics/colab_independent_validation_alpha_stable.csv"),
        PROGRESS_FILE,
    ]

    for p in copy_targets:
        if p.exists():
            shutil.copy2(p, drive_out / p.name)

    if paths["model_alpha"].exists():
        shutil.copytree(paths["model_alpha"], drive_out / paths["model_alpha"].name)
    if paths["model_fused"].exists():
        shutil.copytree(paths["model_fused"], drive_out / paths["model_fused"].name)

    print("Saved outputs to:", drive_out)
    progress_done("save_outputs", f"Outputs copied to {drive_out}")

    print("\nProgress snapshot:")
    show_progress()
except Exception as e:
    progress_fail("save_outputs", str(e))
    raise
