
# PIEL-NET Pipeline (Notebook)
This notebook mirrors `main.py` and runs the end-to-end PIEL-NET pipeline for city–region temperature forecasting.
- Physics prior (advection–diffusion) → LMS baseline → hybrid alignment  
- ConvLSTM baseline (V4) and RAFL-specialized expert (V5)  
- edRVFL-SC ensemble fusion → save final metrics and predictions

> **Note:** Ensure your environment can import `ed_rvfl_sc`. If you package it as a module, place it on `PYTHONPATH` or next to this notebook.



## 0) (Optional) Install dependencies
If you already set up a virtualenv with `requirements.txt`, skip this cell.


In [1]:
!pip install -r requirements.txt
#

Collecting ed_rvfl_sc==0.1.1 (from -r requirements.txt (line 1))
  Using cached ed_rvfl_sc-0.1.1-py3-none-any.whl.metadata (6.8 kB)
Collecting protobuf>=3.20.2 (from onnx==1.17.0->-r requirements.txt (line 4))
  Using cached protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (679 bytes)
Using cached ed_rvfl_sc-0.1.1-py3-none-any.whl (5.4 kB)
Using cached protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
Installing collected packages: protobuf, ed_rvfl_sc
[2K  Attempting uninstall: protobuf
[2K    Found existing installation: protobuf 4.21.12
[2K    Uninstalling protobuf-4.21.12:
[2K      Successfully uninstalled protobuf-4.21.12
[2K  Attempting uninstall: ed_rvfl_sc
[2K    Found existing installation: ed-rvfl-sc 0.1.0
[2K    Uninstalling ed-rvfl-sc-0.1.0:
[2K      Successfully uninstalled ed-rvfl-sc-0.1.038;5;237m╺[0m[38;5;237m━━━━━━━━━━━━━━━━━━━[0m [32m1/2[0m [ed_rvfl_sc]
[2K   [38;2;114;156;31m━━━━━━━━━


## 1) Imports and path setup


In [2]:

import os, json, sys, numpy as np

# Make sure local src/ is importable if you keep modules there
sys.path.insert(0, './src')

from data_loader import load_data
from advection import get_optimized_physics_predictions
from matric import calculate_metrics
from data_transform import transform_X
from error_compute import calculate_combined_errors
from extract_target_column import extract_target_column
from fuzzy_mem import compute_fuzzy_memberships
from PIEL_NET import HybridModel

# Optional expert: ensure this is installed or available on path
from ed_rvfl_sc import edRVFL_SC


2025-10-03 22:52:55.591713: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759503175.613544 4116207 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759503175.620407 4116207 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1759503175.637229 4116207 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1759503175.637245 4116207 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1759503175.637247 4116207 computation_placer.cc:177] computation placer alr


## 2) Parameters (replace argparse)
Adjust paths and hyperparameters below. The defaults match `main.py`.


In [3]:

from types import SimpleNamespace

args = SimpleNamespace(
    # Data + IO
    data_dir = "./Data",
    results_root = "Results/Ablation",
    output_name = "C_T2M",
    # Sequence config
    T = 48,  # input window
    S = 12,  # stride
    H = 12,  # horizon
    # Training
    epochs = 1000,
    batch_size = 32,
    patience = 10,
    # Loss config
    loss_type_v4 = "MAE",         # ["MAE", "MSE", "focal"]
    loss_type_v5 = "focal",       # ["MAE", "MSE", "focal"]
    alpha_w = 0.2,
    beta_w = 2.0,
    gamma_w = 5.0,
    eta = 0.1,
    focal_gamma = 5.0,
    # edRVFL-SC
    rvfl_units = 256,
    rvfl_lambda = 1e-3,
    rvfl_Lmax = 7,
    rvfl_deep_boost = 0.9,
    # Misc
    seed = 42
)
np.random.seed(args.seed)



## 3) Utilities


In [4]:

def ensure_dir(path: str):
    os.makedirs(path, exist_ok=True)
    return path



## 4) Run the pipeline
This cell processes all `.csv` files in `args.data_dir` and saves only the **final** ensemble results under `Results/<dataset>/PIELNET/`.


In [5]:

# Create results root
ensure_dir(args.results_root)

for csv_file in sorted(os.listdir(args.data_dir)):
    if not csv_file.endswith(".csv"):
        continue

    csv_path = os.path.join(args.data_dir, csv_file)
    dataset = os.path.splitext(csv_file)[0]
    run_dir = ensure_dir(os.path.join(args.results_root, dataset))
    final_dir = ensure_dir(os.path.join(run_dir, "PIELNET"))  # unified folder name

    print(f"\n=== Processing dataset: {dataset} ===")

    # ---- Load & window ----
    X_train, Y_train, X_val, Y_val, X_test, Y_test, norm_stats, col_map = load_data(
        csv_path=csv_path, T=args.T, S=args.S, H=args.H, output_dir=None
    )

    # ---- Physics predictions ----
    Y_train_phy, Y_val_phy, Y_test_phy, _ = get_optimized_physics_predictions(
        X_train, Y_train, X_val, X_test, norm_stats, col_map, args.H
    )

    # Align physics predictions via linear map
    A = np.linalg.pinv(Y_train_phy) @ Y_train
    Y_train_phy = Y_train_phy @ A
    Y_val_phy   = Y_val_phy   @ A
    Y_test_phy  = Y_test_phy  @ A

    # ---- LMS baseline over raw features ----
    Xtr = X_train.reshape(X_train.shape[0], -1)
    Xva = X_val.reshape(X_val.shape[0], -1)
    Xte = X_test.reshape(X_test.shape[0], -1)
    W_lms, *_ = np.linalg.lstsq(Xtr, Y_train, rcond=None)
    Y_est_train = Xtr @ W_lms
    Y_est_val   = Xva @ W_lms
    Y_est_test  = Xte @ W_lms

    # ---- Physics + LMS hybrid projection ----
    Y_train_H = np.hstack([Y_est_train, Y_train_phy])
    Y_val_H   = np.hstack([Y_est_val,   Y_val_phy])
    Y_test_H  = np.hstack([Y_est_test,  Y_test_phy])

    A2 = np.linalg.pinv(Y_train_H) @ Y_train
    Y_train_H = Y_train_H @ A2
    Y_val_H   = Y_val_H   @ A2
    Y_test_H  = Y_test_H  @ A2

    # ---- Error signals & memberships ----
    Err_train = calculate_combined_errors(Y_train_H, Y_train)
    Err_val   = calculate_combined_errors(Y_val_H,   Y_val)

    # Not used later but available if needed (kept for parity with main.py)
    _ = extract_target_column(X_train, col_map, target_col=args.output_name)

    train_memberships, val_memberships = compute_fuzzy_memberships(
        Err_train, Err_val, mf_type="triangle"
    )

    # ---- Data transform for ConvLSTM model ----
    X_train_P = np.expand_dims(transform_X(X_train, col_map)[0], axis=-1)
    X_val_P   = np.expand_dims(transform_X(X_val,   col_map)[0], axis=-1)
    X_test_P  = np.expand_dims(transform_X(X_test,  col_map)[0], axis=-1)

    # Pack labels with error + memberships
    YY_train = np.column_stack((Y_train, Err_train, train_memberships))
    YY_val   = np.column_stack((Y_val,   Err_val,   val_memberships))

    # ---- Data model V4 ----
    data_model = HybridModel(
        input_shape=X_train_P.shape[1:],
        pi_dim=Y_train.shape[1],
        checkpoint_path=os.path.join(run_dir, "_tmp_ignore_v4"),
        loss_type=args.loss_type_v4,
        alpha=args.alpha_w, beta=args.beta_w, gamma=args.gamma_w,
        eta=args.eta, focal_gamma=args.focal_gamma
    )
    data_model.fit(
        X_train_P, YY_train,
        validation_data=(X_val_P, YY_val),
        epochs=args.epochs, batch_size=args.batch_size, patience=args.patience
    )
    Y_train_D = data_model.predict(X_train_P)
    Y_val_D   = data_model.predict(X_val_P)
    Y_test_D  = data_model.predict(X_test_P)

    # ---- Data model V5 (RAFL) ----
    data_model_F = HybridModel(
        input_shape=X_train_P.shape[1:],
        pi_dim=Y_train.shape[1],
        checkpoint_path=os.path.join(run_dir, "_tmp_ignore_v5"),
        loss_type=args.loss_type_v5,
        alpha=args.alpha_w, beta=args.beta_w, gamma=args.gamma_w,
        eta=args.eta, focal_gamma=args.focal_gamma
    )
    data_model_F.fit(
        X_train_P, YY_train,
        validation_data=(X_val_P, YY_val),
        epochs=args.epochs, batch_size=args.batch_size, patience=args.patience
    )
    Y_train_FD = data_model_F.predict(X_train_P)
    Y_val_FD   = data_model_F.predict(X_val_P)
    Y_test_FD  = data_model_F.predict(X_test_P)

    # ---- Final ensemble (edRVFL-SC) ----
    Z_train = np.hstack([Y_train_D, Y_train_FD, Y_train_H])
    Z_val   = np.hstack([Y_val_D,   Y_val_FD,   Y_val_H])
    Z_test  = np.hstack([Y_test_D,  Y_test_FD,  Y_test_H])

    MOE = edRVFL_SC(
        num_units=args.rvfl_units,
        activation="relu",
        lambda_=args.rvfl_lambda,
        Lmax=args.rvfl_Lmax,
        deep_boosting=args.rvfl_deep_boost
    )
    MOE.train(Z_train, Y_train)
    Y_test_final = MOE.predict(Z_test)

    # ---- Persist only final metrics under PIELNET ----
    metrics_all = {}
    metrics_all["PIELNET"] = calculate_metrics(
        Y_test, Y_test_final, args.output_name, norm_stats, final_dir
    )

    # Also save a compact metrics.json at dataset root for quick scan
    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
        json.dump(metrics_all, f, indent=4)

    print(f"✓ Completed {dataset} → {final_dir}")



=== Processing dataset: Calgary ===

Training Statistics:
X shape: (5110, 48, 49)
X min: -4.9660, X max: 6.5691, X mean: 0.0006
Y shape: (5110, 12)
Y min: -3.2547, Y max: 2.7794, Y mean: 0.0008

Validation Statistics:
X shape: (1091, 48, 49)
X min: -5.2414, X max: 5.1212, X mean: 0.0242
Y shape: (1091, 12)
Y min: -3.3688, Y max: 2.4174, Y mean: -0.0218

Test Statistics:
X shape: (1092, 48, 49)
X min: -4.2970, X max: 5.0762, X mean: 0.1159
Y shape: (1092, 12)
Y min: -3.8539, Y max: 2.6135, Y mean: 0.1349

Column Mapping:
 0: YEAR
 1: MO
 2: DY
 3: HR
 4: C_WS50M
 5: C_WD50M
 6: C_PS
 7: C_QV2M
 8: C_T2M
 9: N_WS50M
10: N_WD50M
11: N_PS
12: N_QV2M
13: N_T2M
14: NE_WS50M
15: NE_WD50M
16: NE_PS
17: NE_QV2M
18: NE_T2M
19: E_WS50M
20: E_WD50M
21: E_PS
22: E_QV2M
23: E_T2M
24: SE_WS50M
25: SE_WD50M
26: SE_PS
27: SE_QV2M
28: SE_T2M
29: S_WS50M
30: S_WD50M
31: S_PS
32: S_QV2M
33: S_T2M
34: SW_WS50M
35: SW_WD50M
36: SW_PS
37: SW_QV2M
38: SW_T2M
39: W_WS50M
40: W_WD50M
41: W_PS
42: W_QV2M
43: W_

I0000 00:00:1759503203.247305 4116207 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8092 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080 Ti, pci bus id: 0000:03:00.0, compute capability: 8.6


Epoch 1/1000


I0000 00:00:1759503211.961492 4116487 service.cc:152] XLA service 0x597e49dc7570 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1759503211.961518 4116487 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3080 Ti, Compute Capability 8.6
2025-10-03 22:53:32.253378: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1759503213.589485 4116487 cuda_dnn.cc:529] Loaded cuDNN version 90701


[1m  1/160[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m41:23[0m 16s/step - custom_mae: 0.5978 - loss: 0.5978

I0000 00:00:1759503220.251385 4116487 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 180ms/step - custom_mae: 0.4903 - loss: 0.4903 - val_custom_mae: 0.2898 - val_loss: 0.2898
Epoch 2/1000
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 108ms/step - custom_mae: 0.1591 - loss: 0.1591 - val_custom_mae: 0.2923 - val_loss: 0.2923
Epoch 3/1000
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 110ms/step - custom_mae: 0.1284 - loss: 0.1284 - val_custom_mae: 0.2881 - val_loss: 0.2881
Epoch 4/1000
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 107ms/step - custom_mae: 0.1192 - loss: 0.1192 - val_custom_mae: 0.2734 - val_loss: 0.2734
Epoch 5/1000
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 105ms/step - custom_mae: 0.1115 - loss: 0.1115 - val_custom_mae: 0.2843 - val_loss: 0.2843
Epoch 6/1000
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 109ms/step - custom_mae: 0.1105 - loss: 0.1105 - val_custom_mae: 0.2751 - val_

Expected: main_input
Received: inputs=['Tensor(shape=(None, 48, 5, 10, 1))']
I0000 00:00:1759503614.976124 4116207 devices.cc:67] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
I0000 00:00:1759503614.976281 4116207 single_machine.cc:374] Starting new session
I0000 00:00:1759503614.977179 4116207 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8092 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080 Ti, pci bus id: 0000:03:00.0, compute capability: 8.6
I0000 00:00:1759503615.312005 4116207 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8092 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080 Ti, pci bus id: 0000:03:00.0, compute capability: 8.6
I0000 00:00:1759503615.355056 4116207 devices.cc:67] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
I0000 00:00:1759503615.355318 4116207 single_machine.cc:374] Starting new session
I0000 00:00:1759503615.356639 411

Epoch 1/1000
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 165ms/step - custom_mae: 0.5681 - loss: 0.1340 - val_custom_mae: 0.3772 - val_loss: 0.0042
Epoch 2/1000
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 112ms/step - custom_mae: 0.3344 - loss: 0.0260 - val_custom_mae: 0.3596 - val_loss: 0.0021
Epoch 3/1000
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 112ms/step - custom_mae: 0.2811 - loss: 0.0017 - val_custom_mae: 0.4732 - val_loss: 0.0184
Epoch 4/1000
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 114ms/step - custom_mae: 0.3384 - loss: 0.0111 - val_custom_mae: 0.3192 - val_loss: 7.4943e-04
Epoch 5/1000
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 112ms/step - custom_mae: 0.2541 - loss: 0.0024 - val_custom_mae: 0.4031 - val_loss: 0.0023
Epoch 6/1000
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 113ms/step - custom_mae: 0.2798 - loss: 0.0041 - val_custom_m

I0000 00:00:1759504698.820367 4116207 devices.cc:67] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
I0000 00:00:1759504698.820563 4116207 single_machine.cc:374] Starting new session
I0000 00:00:1759504698.821544 4116207 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8092 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080 Ti, pci bus id: 0000:03:00.0, compute capability: 8.6
I0000 00:00:1759504699.164062 4116207 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8092 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080 Ti, pci bus id: 0000:03:00.0, compute capability: 8.6
I0000 00:00:1759504699.207925 4116207 devices.cc:67] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
I0000 00:00:1759504699.208126 4116207 single_machine.cc:374] Starting new session
I0000 00:00:1759504699.209500 4116207 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/devic

✓ Completed Calgary → Results/Ablation/Calgary/PIELNET
