In [1]:
#!pip install -r requirements.txt
import torch
from IMV_LSTM.networks import IMVTensorMultiStepLSTM
from IMV_LSTM.model_prep import prepare_multistep_data
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader
from models.PatchTST_Attention import Model
from data_provider.data_loader import Dataset_Custom
from evaluation.performance import stepwise_errors, get_preds_truths, evaluate
import torch
from evaluation.randomize_patchtst import (
    restore_original_attention,
    enable_attention_randomization,
    disable_attention_randomization,
    patchtst_randomization_check
)
from evaluation.randomize_imvlstm import imvlstm_attention_randomization_check


# Model Training

## PatchTST

In [None]:
%run scripts/PatchTST/weather_int.py


Running experiment with pred_len=144, logging to ./logs/LongForecasting/PatchTST_Attention_weather_int_576_144.log ...
Args in experiment:
Namespace(random_seed=2021, is_training=1, model_id='weather_int_576_144', model='PatchTST_Attention', data='custom', root_path='C:/Users/miche/Documents/PatchTST/PatchTST_supervised/dataset/', data_path='weather_int.csv', features='MS', target='T (degC)', freq='10T', checkpoints='./checkpoints/', seq_len=576, label_len=144, pred_len=144, fc_dropout=0.2, head_dropout=0.0, patch_len=24, stride=12, padding_patch='end', revin=1, affine=0, subtract_last=0, decomposition=0, kernel_size=25, individual=1, embed_type=0, enc_in=7, dec_in=7, c_out=1, d_model=70, n_heads=7, e_layers=3, d_layers=1, d_ff=280, moving_avg=25, factor=1, distil=True, dropout=0.2, embed='timeF', activation='gelu', output_attention=True, do_predict=True, num_workers=4, itr=1, train_epochs=100, batch_size=128, patience=25, learning_rate=0.0003, des='Exp', loss='mse', lradj='type3', pct

## IMV LSTM

In [None]:
%run scripts/IMV_LSTM/weather_int_IMV_LSTM.py \
  --root_path C:/Users/miche/Documents/PatchTST/PatchTST_supervised/dataset/ \
  --data_path weather_int.csv \
  --input_window 576 \
  --forecast_horizon 144 \
  --batch_size 128 \
  --epochs 100 \
  --lr 3e-4 \
  --patience 25 \
  --save_dir ./logs/IMV_weather


# Model Evaluation

## Loading Models from Path

### Prediction Length 10 - PatchTST

In [3]:
class Args:
    # random seed
    random_seed = 2021

    # basic config
    is_training = 0
    model_id = 'test'
    model = 'PatchTST_Attention'

    # data loader
    data = 'weather'
    root_path = './dataset'
    data_path = 'weather_int.csv'
    features = 'MS'
    target = 'T (degC)'
    freq = '10T'
    checkpoints = './checkpoints/'

    # forecasting task
    seq_len = 336
    label_len = 10
    pred_len = 10

    # PatchTST
    fc_dropout = 0.2
    head_dropout = 0.0
    patch_len = 16
    stride = 8
    padding_patch = 'end'
    revin = 1
    affine = 0
    subtract_last = 0
    decomposition = 0
    kernel_size = 25
    individual = 1  # True

    # Formers
    embed_type = 0
    enc_in = 7
    dec_in = 7
    c_out = 1
    d_model = 70
    n_heads = 7
    e_layers = 3
    d_layers = 1
    d_ff = 280
    moving_avg = 25
    factor = 1
    distil = True
    dropout = 0.2
    embed = 'timeF'
    activation = 'gelu'
    output_attention = True
    do_predict = False

    # optimization
    num_workers = 4
    itr = 1
    train_epochs = 100
    batch_size = 128
    patience = 5
    learning_rate = 0.0001
    des = 'Exp'
    loss = 'mse'
    lradj = 'type3'
    pct_start = 0.3
    use_amp = False

    # GPU
    use_gpu = True
    gpu = 0
    use_multi_gpu = False
    devices = '0,1,2,3'
    test_flop = False


args = Args()



In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ckpt_path=(
    "scripts/PatchTST/checkpoints/"
    "weather_int_336_10_PatchTST_Attention_custom_ftMS_sl336_ll10_pl10_dm70_nh7_el3_dl1_df280_fc1_"
    "ebtimeF_dtTrue_Exp_0/checkpoint.pth"
)
ckpt_patch = torch.load(ckpt_path, map_location=device)

state_dict_patch = ckpt_patch.get("model_state_dict", ckpt_patch)
model_patch = Model(args)
model_patch = model_patch.float().to(device)
model_patch.load_state_dict(state_dict_patch)
model_patch.eval()


Model(
  (model): PatchTST_backbone(
    (revin_layer): RevIN()
    (padding_patch_layer): ReplicationPad1d((0, 8))
    (backbone): TSTiEncoder(
      (W_P): Linear(in_features=16, out_features=70, bias=True)
      (dropout): Dropout(p=0.2, inplace=False)
      (encoder): TSTEncoder(
        (layers): ModuleList(
          (0): TSTEncoderLayer(
            (self_attn): _MultiheadAttention(
              (W_Q): Linear(in_features=70, out_features=70, bias=True)
              (W_K): Linear(in_features=70, out_features=70, bias=True)
              (W_V): Linear(in_features=70, out_features=70, bias=True)
              (sdp_attn): _ScaledDotProductAttention(
                (attn_dropout): Dropout(p=0.0, inplace=False)
              )
              (to_out): Sequential(
                (0): Linear(in_features=70, out_features=70, bias=True)
                (1): Dropout(p=0.2, inplace=False)
              )
            )
            (dropout_attn): Dropout(p=0.2, inplace=False)
            

In [9]:
# 1) Instantiate your test‐time dataset exactly as during training
dataset_test = Dataset_Custom(
    root_path   = "dataset/",
    flag        = "test",                 # pulls the test split
    size        = [336, 10, 10],          # [seq_len, label_len, pred_len]
    features    = "MS",
    data_path   = "weather_int.csv",
    target      = "T (degC)",
    scale       = True,
    timeenc     = 0,
    freq        = "10T",
)

# 2) Wrap in a DataLoader
test_loader_patch = DataLoader(
    dataset_test,
    batch_size  = 128,
    shuffle     = False,
    num_workers = 4,
)


### Prediction Length 10 - IMV-LSTM

In [10]:
# 1) Re-create the model with the same hyper-parameters:
model_IMV = IMVTensorMultiStepLSTM(
    input_dim     = 7,
    output_dim    = 1,
    n_units       = 140,
    forecast_steps= 10
).float().to(device)

# 2) Load the checkpoint (and automatically map to CPU/GPU):
ckpt_path_IMV = "./imv_best.pth"
state_IMV = torch.load(ckpt_path_IMV, map_location=device)
model_IMV.load_state_dict(state_IMV)

df = pd.read_csv("dataset/weather_int.csv")
target_weather= 'T (degC)'
cols_weather_multi=['p (mbar)', 'Tdew (degC)', 'sh (g/kg)', 'wv (m/s)', 'max. wv (m/s)',
       'wd (deg)', 'T (degC)']
input_window = 336   # instead of 40
forecast_horizon = 10
batch_size_weather=128
X_train_multi, y_train_multi, \
X_val_multi, y_val_multi, \
X_test_multi, y_test_multi, \
input_scaler_multi, target_scaler_multi = prepare_multistep_data(
    df=df,
    input_columns=cols_weather_multi,
    target_column=target_weather,
    input_window=input_window,
    forecast_horizon=forecast_horizon,
    scale_data=True
)
X_train_t_multi = torch.tensor(X_train_multi, dtype=torch.float32)
X_val_t_multi   = torch.tensor(X_val_multi, dtype=torch.float32)
X_test_t_multi  = torch.tensor(X_test_multi, dtype=torch.float32)

y_train_t_multi = torch.tensor(y_train_multi, dtype=torch.float32)
y_val_t_multi   = torch.tensor(y_val_multi, dtype=torch.float32)
y_test_t_multi  = torch.tensor(y_test_multi, dtype=torch.float32)


test_loader_IMV = DataLoader(
    TensorDataset(X_test_t_multi, y_test_t_multi),
    shuffle=False,
    batch_size=batch_size_weather
)

FileNotFoundError: [Errno 2] No such file or directory: './imv_best.pth'

### Prediction Length 144 - PatchTST

In [None]:
class Args:
    # random seed
    random_seed = 2021

    # basic config
    is_training = 0
    model_id = 'weather_int_576_144'
    model = 'PatchTST_Attention'

    # data loader
    data = 'custom'
    root_path = './dataset'
    data_path = 'weather_int.csv'
    features = 'MS'
    target = 'T (degC)'
    freq = '10T'
    checkpoints = './checkpoints/'

    # forecasting task
    seq_len = 576     # 4 days of history (576 × 10 min)
    label_len = 144   # decoder sees last 1 day (144 steps)
    pred_len = 144    # forecast horizon = 1 day

    # PatchTST-specific
    fc_dropout = 0.2
    head_dropout = 0.0
    patch_len = 24    # 4 h patches (24 × 10 min)
    stride = 12       # 50% overlap
    padding_patch = 'end'
    revin = 1
    affine = 0
    subtract_last = 0
    decomposition = 0
    kernel_size = 25
    individual = 1  # True

    # Transformer backbone
    embed_type = 0
    enc_in = 7
    dec_in = 7
    c_out = 1
    d_model = 70
    n_heads = 7
    e_layers = 3
    d_layers = 1
    d_ff = 280
    moving_avg = 25
    factor = 1
    distil = True
    dropout = 0.2
    embed = 'timeF'
    activation = 'gelu'
    output_attention = True
    do_predict = False

    # optimization
    num_workers = 4
    itr = 1
    train_epochs = 100
    batch_size = 128
    patience = 25
    learning_rate = 1e-4
    des = 'Exp'
    loss = 'mse'
    lradj = 'type3'
    pct_start = 0.3
    use_amp = False

    # GPU
    use_gpu = True
    gpu = 0
    use_multi_gpu = False
    devices = '0'
    test_flop = False

args_long = Args()


In [None]:
ckpt_path_long=(
    "scripts/PatchTST/checkpoints/"
    "weather_int_336_10_PatchTST_Attention_custom_ftMS_sl336_ll10_pl10_dm70_nh7_el3_dl1_df280_fc1_"
    "ebtimeF_dtTrue_Exp_0/checkpoint.pth",
    map_location=device
)
ckpt_patch_long = torch.load(ckpt_path_long, map_location=device)

state_dict_patch_long = ckpt_patch_long.get("model_state_dict", ckpt_patch_long)
model_patch_long = Model(args_long)
model_patch_long = model_patch_long.float().to(device)
model_patch_long.load_state_dict(state_dict_patch_long)
model_patch_long.eval()


In [None]:
# 1) Instantiate your long-horizon test dataset exactly as during training
dataset_test_patch_long = Dataset_Custom(
    root_path   = "dataset/",           # relative to cwd
    flag        = "test",               # pulls the test split
    size        = [576, 144, 144],      # [seq_len, label_len, pred_len]
    features    = "MS",
    data_path   = "weather_int.csv",
    target      = "T (degC)",
    scale       = True,
    timeenc     = 0,
    freq        = "10T",
)

# 2) Wrap in a DataLoader
test_loader_patch_long = DataLoader(
    dataset_test_patch_long,
    batch_size  = 128,
    shuffle     = False,
    num_workers = 4,
)


### Prediction Length 144 - IMV-LSTM

In [None]:
# 1) Re-create your long-horizon IMV‐LSTM model
model_IMV_long = IMVTensorMultiStepLSTM(
    input_dim      = 7,
    output_dim     = 1,
    n_units        = 140,
    forecast_steps = 144        # forecast_horizon_long
).float().to(device)

# 2) Load the long-horizon checkpoint
ckpt_path_IMV_long = "./imv_best_long.pth"
state_IMV_long     = torch.load(ckpt_path_IMV_long, map_location=device)
model_IMV_long.load_state_dict(state_IMV_long)

# 3) Prepare your data for the long run
df = pd.read_csv("dataset/weather_int.csv")
target_weather      = 'T (degC)'
cols_weather_multi  = [
    'p (mbar)', 'Tdew (degC)', 'sh (g/kg)',
    'wv (m/s)', 'max. wv (m/s)', 'wd (deg)', 'T (degC)'
]
input_window_long     = 576   # e.g. 4 days of history (576×10 min)
forecast_horizon_long = 144   # 1 day ahead (144×10 min)
batch_size_weather    = 128

X_train_long, y_train_long, \
X_val_long,   y_val_long,   \
X_test_long,  y_test_long,  \
in_scaler_long, out_scaler_long = prepare_multistep_data(
    df=df,
    input_columns = cols_weather_multi,
    target_column = target_weather,
    input_window  = input_window_long,
    forecast_horizon = forecast_horizon_long,
    scale_data    = True
)

# 4) Tensor‐ify
X_test_t_long = torch.tensor(X_test_long, dtype=torch.float32)
y_test_t_long = torch.tensor(y_test_long, dtype=torch.float32)

# 5) Long‐horizon test DataLoader
test_loader_IMV_long = DataLoader(
    TensorDataset(X_test_t_long, y_test_t_long),
    shuffle     = False,
    batch_size  = batch_size_weather,
    num_workers = 4
)


### Scaler

In [None]:
# Extract only the target column (same as in your dataset)
target = 'T (degC)'
data = df[[target]].values

# Match train split logic (70% of total rows)
num_train = int(len(data) * 0.7)
train_data = data[:num_train]

# Recreate and fit the scaler
scaler = StandardScaler()
scaler.fit(train_data)


## Results

### MSE, RMSE, MAE

In [None]:
# ── 8) RUN & DISPLAY ─────────────────────────────────────────────────────────
results = []
for name, mdl, loader in [
    ("PatchTST",           model_patch,      test_loader_patch),
    ("PatchTST-Long",      model_patch_long, test_loader_patch_long),
    ("IMV-LSTM",           model_IMV,        test_loader_IMV),
    ("IMV-LSTM-Long",      model_IMV_long,   test_loader_IMV_long),
]:
    metrics = evaluate(mdl, loader, target_channel=-1, scaler=scaler_y)
    metrics["Model"] = name
    results.append(metrics)

df = pd.DataFrame(results)
print(df.to_markdown(index=False))

In [None]:
# 1) Collect predictions & truths for both short and long horizons
patch_preds,        patch_trues        = get_preds_truths(model_patch,      test_loader_patch,      device)
patch_preds_long,   patch_trues_long   = get_preds_truths(model_patch_long, test_loader_patch_long, device)
imv_preds,          imv_trues          = get_preds_truths(model_IMV,        test_loader_IMV,        device)
imv_preds_long,     imv_trues_long     = get_preds_truths(model_IMV_long,   test_loader_IMV_long,   device)

# 2) Compute per-step error
patch_rmse,      patch_mae      = stepwise_errors(patch_preds,      patch_trues)
patch_long_rmse, patch_long_mae = stepwise_errors(patch_preds_long, patch_trues_long)
imv_rmse,        imv_mae        = stepwise_errors(imv_preds,        imv_trues)
imv_long_rmse,   imv_long_mae   = stepwise_errors(imv_preds_long,   imv_trues_long)

# assume all four series have the same length (pred_len)
steps = np.arange(1, len(patch_rmse) + 1)

# 3) Plot RMSE comparison
plt.figure(figsize=(10,4))
plt.plot(steps, patch_rmse,      marker='o', linestyle='-',  label='PatchTST')
plt.plot(steps, patch_long_rmse, marker='o', linestyle='--', label='PatchTST-Long')
plt.plot(steps, imv_rmse,        marker='s', linestyle='-',  label='IMV-LSTM')
plt.plot(steps, imv_long_rmse,   marker='s', linestyle='--', label='IMV-LSTM-Long')
plt.xlabel('Forecast Step')
plt.ylabel('RMSE')
plt.title('Stepwise RMSE Comparison')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# 4) Plot MAE comparison
plt.figure(figsize=(10,4))
plt.plot(steps, patch_mae,      marker='o', linestyle='-',  label='PatchTST')
plt.plot(steps, patch_long_mae, marker='o', linestyle='--', label='PatchTST-Long')
plt.plot(steps, imv_mae,        marker='s', linestyle='-',  label='IMV-LSTM')
plt.plot(steps, imv_long_mae,   marker='s', linestyle='--', label='IMV-LSTM-Long')
plt.xlabel('Forecast Step')
plt.ylabel('MAE')
plt.title('Stepwise MAE Comparison')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


### Attention Plots

In [None]:
runs = {
    "short": dict(
        attn_dir       = "attn/patchtst_short",
        seq_len        = 336,
        pred_len       = 10,
        num_layers     = 3,
        num_batches    = 10,
        batch_size     = 128,
        num_vars       = 7,
        num_heads      = 7,
        feature_names  = cols_weather,
    ),
    "long": dict(
        attn_dir       = "attn/patchtst_long",
        seq_len        = 576,
        pred_len       = 144,
        num_layers     = 3,
        num_batches    = 10,
        batch_size     = 128,
        num_vars       = 7,
        num_heads      = 7,
        feature_names  = cols_weather,
    ),
}

for run_name, params in runs.items():
    plot_patchtst_temporal(
        model_name  = "PatchTST",
        run_name    = run_name,
        **params
    )
    plot_imv_saved_attention(
        model_name  = "IMV-LSTM",
        run_name    = run_name,
        **params
    )


### Randomisation PatchTST

In [None]:
# List out your two runs
restore_original_attention()

# 1) quick smoke check on one batch of the short model
xb, yb, *_ = next(iter(test_loader_patch))
xb = xb.to(device).float()
with torch.no_grad():
    out = model_patch(xb)
print("baseline short out shape:", out.shape)

enable_attention_randomization()
with torch.no_grad():
    out_r = model_patch(xb)
print("rand-attn short out shape:", out_r.shape)
disable_attention_randomization()

# 2) run full randomization tests on both
for name, m, loader, pred_len in [
    ("PatchTST-Short", model_patch,      test_loader_patch,      10),
    ("PatchTST-Long",  model_patch_long, test_loader_patch_long, 144),
]:
    print(f"\n=== {name} ===")
    orig_mse, rand_mse = patchtst_randomization_check(
        m, loader, device, pred_len=pred_len
    )
    print(f"orig MSE = {orig_mse:.4f}, rand MSE = {rand_mse:.4f}")

### Randomization IMV-LSTM

In [None]:
runs = [
    ("IMV-LSTM-Short", model_IMV_short, test_loader_IMV_short, 10),
    ("IMV-LSTM-Long",  model_IMV_long,  test_loader_IMV_long,  144),
]

for name, model, loader, pred_len in runs:
    model = model.float().to(device).eval()
    print(f"\n=== {name} (pred_len={pred_len}) ===")
    orig_mse, rand_mse = imvlstm_attention_randomization_check(
        model,
        loader,
        device,
        target_channel=-1,
        metric=None  # or pass your metric function
    )
    print(f"orig MSE = {orig_mse:.4f}   rand MSE = {rand_mse:.4f}")

In [None]:
import torch
from utils import (
    imvlstm_attention_randomization_check,
    imvlstm_beta_randomization_check
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# assume you have two IMV-LSTM runs defined:
#   model_IMV_short, test_loader_IMV_short, pred_len=10
#   model_IMV_long,  test_loader_IMV_long,  pred_len=144

runs = [
    ("IMV-Short α-rand", model_IMV_short, test_loader_IMV_short, 10),
    ("IMV-Long  α-rand", model_IMV_long,  test_loader_IMV_long, 144),
    ("IMV-Short β-rand", model_IMV_short, test_loader_IMV_short, 10),
    ("IMV-Long  β-rand", model_IMV_long,  test_loader_IMV_long, 144),
]

for name, model, loader, pred_len in runs:
    model = model.float().to(device).eval()
    print(f"\n=== {name} (pred_len={pred_len}) ===")
    if "α" in name:
        orig, rand = imvlstm_attention_randomization_check(
            model, loader, device, target_channel=-1
        )
    else:
        orig, rand = imvlstm_beta_randomization_check(
            model, loader, device, target_channel=-1
        )
    print(f"orig MSE = {orig:.4f}, rand MSE = {rand:.4f}")
