
# PIEL-NET Pipeline (Notebook)
This notebook mirrors `main.py` and runs the end-to-end PIEL-NET pipeline for city–region temperature forecasting.
- Physics prior (advection–diffusion) → LMS baseline → hybrid alignment  
- ConvLSTM baseline (V4) and RAFL-specialized expert (V5)  
- edRVFL-SC ensemble fusion → save final metrics and predictions

> **Note:** Ensure your environment can import `ed_rvfl_sc`. If you package it as a module, place it on `PYTHONPATH` or next to this notebook.



## 0) (Optional) Install dependencies
If you already set up a virtualenv with `requirements.txt`, skip this cell.


In [None]:
# !pip install -r requirements.txt
# Or install manually:
# !pip install numpy pandas scipy matplotlib scikit-learn tensorflow keras tqdm



## 1) Imports and path setup


In [None]:

import os, json, sys, numpy as np

# Make sure local src/ is importable if you keep modules there
sys.path.insert(0, './src')

from data_loader import load_data
from advection import get_optimized_physics_predictions
from matric import calculate_metrics
from data_transform import transform_X
from error_compute import calculate_combined_errors
from extract_target_column import extract_target_column
from fuzzy_mem import compute_fuzzy_memberships
from PIEL_NET import HybridModel

# Optional expert: ensure this is installed or available on path
from ed_rvfl_sc import edRVFL_SC



## 2) Parameters (replace argparse)
Adjust paths and hyperparameters below. The defaults match `main.py`.


In [None]:

from types import SimpleNamespace

args = SimpleNamespace(
    # Data + IO
    data_dir = "./Data",
    results_root = "Results/Ablation",
    output_name = "C_T2M",
    # Sequence config
    T = 48,  # input window
    S = 12,  # stride
    H = 12,  # horizon
    # Training
    epochs = 1000,
    batch_size = 32,
    patience = 10,
    # Loss config
    loss_type_v4 = "MAE",         # ["MAE", "MSE", "focal"]
    loss_type_v5 = "focal",       # ["MAE", "MSE", "focal"]
    alpha_w = 0.2,
    beta_w = 2.0,
    gamma_w = 5.0,
    eta = 0.1,
    focal_gamma = 5.0,
    # edRVFL-SC
    rvfl_units = 256,
    rvfl_lambda = 1e-3,
    rvfl_Lmax = 7,
    rvfl_deep_boost = 0.9,
    # Misc
    seed = 42
)
np.random.seed(args.seed)



## 3) Utilities


In [None]:

def ensure_dir(path: str):
    os.makedirs(path, exist_ok=True)
    return path



## 4) Run the pipeline
This cell processes all `.csv` files in `args.data_dir` and saves only the **final** ensemble results under `Results/<dataset>/PIELNET/`.


In [None]:

# Create results root
ensure_dir(args.results_root)

for csv_file in sorted(os.listdir(args.data_dir)):
    if not csv_file.endswith(".csv"):
        continue

    csv_path = os.path.join(args.data_dir, csv_file)
    dataset = os.path.splitext(csv_file)[0]
    run_dir = ensure_dir(os.path.join(args.results_root, dataset))
    final_dir = ensure_dir(os.path.join(run_dir, "PIELNET"))  # unified folder name

    print(f"\n=== Processing dataset: {dataset} ===")

    # ---- Load & window ----
    X_train, Y_train, X_val, Y_val, X_test, Y_test, norm_stats, col_map = load_data(
        csv_path=csv_path, T=args.T, S=args.S, H=args.H, output_dir=None
    )

    # ---- Physics predictions ----
    Y_train_phy, Y_val_phy, Y_test_phy, _ = get_optimized_physics_predictions(
        X_train, Y_train, X_val, X_test, norm_stats, col_map, args.H
    )

    # Align physics predictions via linear map
    A = np.linalg.pinv(Y_train_phy) @ Y_train
    Y_train_phy = Y_train_phy @ A
    Y_val_phy   = Y_val_phy   @ A
    Y_test_phy  = Y_test_phy  @ A

    # ---- LMS baseline over raw features ----
    Xtr = X_train.reshape(X_train.shape[0], -1)
    Xva = X_val.reshape(X_val.shape[0], -1)
    Xte = X_test.reshape(X_test.shape[0], -1)
    W_lms, *_ = np.linalg.lstsq(Xtr, Y_train, rcond=None)
    Y_est_train = Xtr @ W_lms
    Y_est_val   = Xva @ W_lms
    Y_est_test  = Xte @ W_lms

    # ---- Physics + LMS hybrid projection ----
    Y_train_H = np.hstack([Y_est_train, Y_train_phy])
    Y_val_H   = np.hstack([Y_est_val,   Y_val_phy])
    Y_test_H  = np.hstack([Y_est_test,  Y_test_phy])

    A2 = np.linalg.pinv(Y_train_H) @ Y_train
    Y_train_H = Y_train_H @ A2
    Y_val_H   = Y_val_H   @ A2
    Y_test_H  = Y_test_H  @ A2

    # ---- Error signals & memberships ----
    Err_train = calculate_combined_errors(Y_train_H, Y_train)
    Err_val   = calculate_combined_errors(Y_val_H,   Y_val)

    # Not used later but available if needed (kept for parity with main.py)
    _ = extract_target_column(X_train, col_map, target_col=args.output_name)

    train_memberships, val_memberships = compute_fuzzy_memberships(
        Err_train, Err_val, mf_type="triangle"
    )

    # ---- Data transform for ConvLSTM model ----
    X_train_P = np.expand_dims(transform_X(X_train, col_map)[0], axis=-1)
    X_val_P   = np.expand_dims(transform_X(X_val,   col_map)[0], axis=-1)
    X_test_P  = np.expand_dims(transform_X(X_test,  col_map)[0], axis=-1)

    # Pack labels with error + memberships
    YY_train = np.column_stack((Y_train, Err_train, train_memberships))
    YY_val   = np.column_stack((Y_val,   Err_val,   val_memberships))

    # ---- Data model V4 ----
    data_model = HybridModel(
        input_shape=X_train_P.shape[1:],
        pi_dim=Y_train.shape[1],
        checkpoint_path=os.path.join(run_dir, "_tmp_ignore_v4"),
        loss_type=args.loss_type_v4,
        alpha=args.alpha_w, beta=args.beta_w, gamma=args.gamma_w,
        eta=args.eta, focal_gamma=args.focal_gamma
    )
    data_model.fit(
        X_train_P, YY_train,
        validation_data=(X_val_P, YY_val),
        epochs=args.epochs, batch_size=args.batch_size, patience=args.patience
    )
    Y_train_D = data_model.predict(X_train_P)
    Y_val_D   = data_model.predict(X_val_P)
    Y_test_D  = data_model.predict(X_test_P)

    # ---- Data model V5 (RAFL) ----
    data_model_F = HybridModel(
        input_shape=X_train_P.shape[1:],
        pi_dim=Y_train.shape[1],
        checkpoint_path=os.path.join(run_dir, "_tmp_ignore_v5"),
        loss_type=args.loss_type_v5,
        alpha=args.alpha_w, beta=args.beta_w, gamma=args.gamma_w,
        eta=args.eta, focal_gamma=args.focal_gamma
    )
    data_model_F.fit(
        X_train_P, YY_train,
        validation_data=(X_val_P, YY_val),
        epochs=args.epochs, batch_size=args.batch_size, patience=args.patience
    )
    Y_train_FD = data_model_F.predict(X_train_P)
    Y_val_FD   = data_model_F.predict(X_val_P)
    Y_test_FD  = data_model_F.predict(X_test_P)

    # ---- Final ensemble (edRVFL-SC) ----
    Z_train = np.hstack([Y_train_D, Y_train_FD, Y_train_H])
    Z_val   = np.hstack([Y_val_D,   Y_val_FD,   Y_val_H])
    Z_test  = np.hstack([Y_test_D,  Y_test_FD,  Y_test_H])

    MOE = edRVFL_SC(
        num_units=args.rvfl_units,
        activation="relu",
        lambda_=args.rvfl_lambda,
        Lmax=args.rvfl_Lmax,
        deep_boosting=args.rvfl_deep_boost
    )
    MOE.train(Z_train, Y_train)
    Y_test_final = MOE.predict(Z_test)

    # ---- Persist only final metrics under PIELNET ----
    metrics_all = {}
    metrics_all["PIELNET"] = calculate_metrics(
        Y_test, Y_test_final, args.output_name, norm_stats, final_dir
    )

    # Also save a compact metrics.json at dataset root for quick scan
    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
        json.dump(metrics_all, f, indent=4)

    print(f"✓ Completed {dataset} → {final_dir}")
