# 03 — FNO Surrogate Fidelity Validation

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SharathSPhD/RLpower/blob/main/notebooks/03_surrogate_validation.ipynb)

Load and inspect FNO surrogate fidelity metrics: per-variable RMSE and R² against held-out FMU roll-outs.

**On Google Colab**: fidelity metrics are loaded from `data/surrogate_fidelity_report.json` in the repository. The FNO model weights (`best_fno.pt`, ~50 MB) are not committed to the repo; a model architecture sanity check is included instead. To load the actual weights, download from the Releases page.

**Key finding**: The first FNO training run failed fidelity gates (overall R² = −77.15) due to dataset upsampling artefacts. Remediation requires ≥100,000 unique LHS FMU trajectories.

In [1]:
# ── Environment Setup (runs on Colab or locally) ──────────────────────────────
import subprocess, sys, os

IN_COLAB = "google.colab" in sys.modules
try:
    import google.colab  # noqa: F401
    IN_COLAB = True
except ImportError:
    pass

REPO_URL = "https://github.com/SharathSPhD/RLpower.git"
REPO_DIR = "/content/RLpower" if IN_COLAB else os.environ.get("WORKSPACE_DIR", "/workspace")

if IN_COLAB:
    if not os.path.exists(REPO_DIR):
        subprocess.run(["git", "clone", "--depth=1", REPO_URL, REPO_DIR], check=True)
    os.chdir(REPO_DIR)
    sys.path.insert(0, os.path.join(REPO_DIR, "src"))
    subprocess.run(
        [sys.executable, "-m", "pip", "install", "-q",
         "matplotlib", "numpy", "pyyaml", "torch"],
        check=True,
    )
else:
    _src = os.path.join(REPO_DIR, "src")
    if _src not in sys.path:
        sys.path.insert(0, _src)
    os.chdir(REPO_DIR)

import matplotlib
matplotlib.use("Agg")

from pathlib import Path
import json
import matplotlib.pyplot as plt
import numpy as np
import torch
import yaml

from sco2rl.surrogate.fno_model import FNO1d

print(f"Environment: {'Google Colab' if IN_COLAB else 'Local/Docker'}")
print("Imports OK")

Imports OK


In [2]:
ROOT = Path('.').resolve()

# Load from repo data/ dir (works on Colab and locally)
report_path = ROOT / 'data/surrogate_fidelity_report.json'
# Fall back to artifact path if running locally with full artifact tree
if not report_path.exists():
    report_path = ROOT / 'artifacts/surrogate/fidelity_report.json'

report = json.loads(report_path.read_text())
print(f'overall_rmse_normalized: {report["overall_rmse_normalized"]:.4f}')
print(f'overall_r2:              {report["overall_r2"]:.4f}')
print(f'passed:                  {report["passed"]}')
print()
print('Per-variable metrics:')
for var, m in report['per_variable'].items():
    print(f'  {var:<30} RMSE={m["rmse"]:.4f}  R2={m["r2"]:.4f}')

names = list(report['per_variable'].keys())
rmse = [report['per_variable'][k]['rmse'] for k in names]
r2   = [report['per_variable'][k]['r2']   for k in names]

fig, axs = plt.subplots(1, 2, figsize=(14, 5))
axs[0].barh(names, rmse)
axs[0].axvline(0.05, color='red', linestyle='--', label='Gate threshold (0.05)')
axs[0].set_title('Normalized RMSE by Variable')
axs[0].set_xlabel('Norm. RMSE')
axs[0].legend()
axs[0].grid(True, axis='x')

axs[1].barh(names, r2)
axs[1].axvline(0.80, color='green', linestyle='--', label='Gate threshold (0.80)')
axs[1].axvline(0.00, color='black', linewidth=0.8)
axs[1].set_title('R² by Variable')
axs[1].set_xlabel('R²')
axs[1].legend()
axs[1].grid(True, axis='x')

plt.tight_layout()
plt.savefig('/tmp/surrogate_fidelity.png', dpi=90, bbox_inches='tight')
plt.show()
print('\nNote: Negative R² indicates the surrogate is worse than a mean predictor.')
print('Root cause: 75k-sample dataset was built by upsampling a smaller collection.')

overall_rmse_normalized: 0.19725745653488658
overall_r2: -77.15335956641606
passed: False


In [3]:
# FNO model architecture sanity check
# The trained weights (best_fno.pt ~50 MB) are not committed to the repo.
# Load from artifacts/ if running locally; skip weight loading on Colab.
fno_cfg = yaml.safe_load((ROOT / 'configs/surrogate/fno_surrogate.yaml').read_text())['fno']
model = FNO1d(
    modes=int(fno_cfg['modes']),
    width=int(fno_cfg['width']),
    n_layers=int(fno_cfg['n_layers']),
    input_dim=int(fno_cfg['input_dim']),
    output_dim=int(fno_cfg['output_dim']),
    activation=fno_cfg.get('activation', 'gelu'),
    padding=int(fno_cfg.get('padding', 8)),
)

weights_path = ROOT / 'artifacts/surrogate/best_fno.pt'
if weights_path.exists():
    model.load_state_dict(torch.load(weights_path, map_location='cpu'))
    print('Loaded trained FNO weights from', weights_path)
else:
    print('Trained weights not available — running architecture check only (random weights).')

model.train(False)

# Shape sanity check (works with or without trained weights)
state  = torch.zeros((1, int(fno_cfg['output_dim'])), dtype=torch.float32)
action = torch.zeros((1, int(fno_cfg['input_dim']) - int(fno_cfg['output_dim'])), dtype=torch.float32)
with torch.no_grad():
    pred = model.predict_next_state(state, action)
print('Input state dim :', state.shape[1])
print('Input action dim:', action.shape[1])
print('Output (next state) shape:', tuple(pred.shape))
print()
param_count = sum(p.numel() for p in model.parameters())
print(f'Model parameters: {param_count:,}')

prediction shape: (1, 14)
