# data

In [2]:
import torch 
import importlib
import monotonic
import data_manager
import metrics
import utils
importlib.reload(utils)
import train as Train
from train import execute_model_evaluation
import train_config
from data_manager import DatasetManager
from train_config import FlatACLConfig, FlatDLinearConfig, FlatNaiveConfig, FlatPatchTSTConfig, FlatTimeMixerConfig
from dataclasses import replace

%load_ext autoreload
%autoreload 2
modules_to_reload_list = [
    data_manager,
    Train,
    train_config,
    monotonic,
    # data_manager, # Reloaded only once even if listed twice
    utils,
    # train_config, # Reloaded only once even if listed twice
    metrics
]

# Initialize the data manager
data_mgr = DatasetManager(device='cuda')

# Load a synthetic dataset
data_mgr.load_trajectory('lorenz', steps=24999, dt=1e-2, ) # 51999 36999
# SCALE = False

LorenzSystem initialized with method: rk4 on device: cuda

Dataset: lorenz (synthetic)
Shape: torch.Size([25000, 3])
Channels: 3
Length: 25000
Parameters: {'steps': 24999, 'dt': 0.01}

Sample data (first 2 rows):
tensor([[1.0000, 0.9800, 1.1000],
        [1.0106, 1.2389, 1.0820]], device='cuda:0')


<data_manager.DatasetManager at 0x248f39945c0>

# AB SWD

## FRIREN
Note: used 'convex' mixing, which has been proven precviously that it matters little to the original baseline.

### huber

In [3]:
from monotonic import DynamicTanh
import torch.nn as nn

importlib.reload(monotonic)
importlib.reload(train_config) 
cfg = train_config.FlatACLConfig(  # original householder 
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],# data_mgr.channels,              # ← number of features in your data
    batch_size=128,
    learning_rate=9e-4, 
    seeds=[1955, 7, 20],  
    epochs=50, 
    dim_hidden=128,
    dim_augment=128, 
    ablate_no_koopman=False,
    use_complex_eigenvalues=True,
    second_delay_use_shift=True,
    ablate_rotate_back_Koopman=True, 
    ablate_shift_inside_scale=False,
    householder_reflects_latent = 2,
    householder_reflects_data = 4,
    mixing_strategy='convex', ### Here
    loss_backward_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    ablate_deterministic_y0=False, 

)
cfg.x_to_z_delay.enable_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_delay.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_delay.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_delay.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.x_to_z_deri.enable_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_deri.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_deri.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_deri.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_x_main.enable_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_x_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_x_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_x_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_push_to_z.enable_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_scale_shift = [True, False]
cfg.z_push_to_z.spectral_flags_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_hidden_layers = [False, False]
cfg.z_push_to_z.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_push_to_z.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_y_main.enable_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_y_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_y_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_y_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 74.2999, mae: 6.4853, huber: 6.0056, swd: 42.3418, ept: 32.1924
Epoch [1/50], Val Losses: mse: 59.6500, mae: 5.8156, huber: 5.3402, swd: 25.8090, ept: 35.5563
Epoch [1/50], Test Losses: mse: 55

### huber + 0.1 SWD

In [6]:
from monotonic import DynamicTanh
import torch.nn as nn

importlib.reload(monotonic)
importlib.reload(train_config) 
cfg = train_config.FlatACLConfig(  # original householder 
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],# data_mgr.channels,              # ← number of features in your data
    batch_size=128,
    learning_rate=9e-4, 
    seeds=[1955, 7, 20],  
    epochs=50, 
    dim_hidden=128,
    dim_augment=128, 
    ablate_no_koopman=False,
    use_complex_eigenvalues=True,
    second_delay_use_shift=True,
    ablate_rotate_back_Koopman=True, 
    ablate_shift_inside_scale=False,
    householder_reflects_latent = 2,
    householder_reflects_data = 4,
    mixing_strategy='convex', ### Here
    loss_backward_weights = [0.0, 0.0, 1.0, 0.1, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.1, 0.0],
    ablate_deterministic_y0=False, 

)
cfg.x_to_z_delay.enable_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_delay.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_delay.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_delay.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.x_to_z_deri.enable_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_deri.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_deri.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_deri.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_x_main.enable_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_x_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_x_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_x_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_push_to_z.enable_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_scale_shift = [True, False]
cfg.z_push_to_z.spectral_flags_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_hidden_layers = [False, False]
cfg.z_push_to_z.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_push_to_z.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_y_main.enable_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_y_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_y_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_y_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 141.0142, mae: 8.4018, huber: 7.9166, swd: 14.0622, ept: 10.8112
Epoch [1/50], Val Losses: mse: 110.9817, mae: 7.1460, huber: 6.6629, swd: 1.2670, ept: 16.2459
Epoch [1/50], Test Losses: mse: 1

### huber + 0.5 SWD

In [20]:
from monotonic import DynamicTanh
import torch.nn as nn

importlib.reload(monotonic)
importlib.reload(train_config) 
cfg = train_config.FlatACLConfig(  # original householder 
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],# data_mgr.channels,              # ← number of features in your data
    batch_size=128,
    learning_rate=9e-4, 
    seeds=[1955, 7, 20],  
    epochs=50, 
    dim_hidden=128,
    dim_augment=128, 
    ablate_no_koopman=False,
    use_complex_eigenvalues=True,
    second_delay_use_shift=True,
    ablate_rotate_back_Koopman=True, 
    ablate_shift_inside_scale=False,
    householder_reflects_latent = 2,
    householder_reflects_data = 4,
    mixing_strategy='convex', ### Here
    loss_backward_weights = [0.0, 0.0, 1.0, 0.5, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.5, 0.0],
    ablate_deterministic_y0=False, 

)
cfg.x_to_z_delay.enable_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_delay.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_delay.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_delay.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.x_to_z_deri.enable_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_deri.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_deri.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_deri.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_x_main.enable_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_x_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_x_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_x_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_push_to_z.enable_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_scale_shift = [True, False]
cfg.z_push_to_z.spectral_flags_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_hidden_layers = [False, False]
cfg.z_push_to_z.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_push_to_z.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_y_main.enable_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_y_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_y_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_y_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 236.0685, mae: 10.9817, huber: 10.4934, swd: 13.4349, ept: 4.3301
Epoch [1/50], Val Losses: mse: 154.7652, mae: 9.0832, huber: 8.5961, swd: 1.0262, ept: 6.9456
Epoch [1/50], Test Losses: mse: 1

### MSE

In [21]:
from monotonic import DynamicTanh
import torch.nn as nn

importlib.reload(monotonic)
importlib.reload(train_config) 
cfg = train_config.FlatACLConfig(  # original householder 
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],# data_mgr.channels,              # ← number of features in your data
    batch_size=128,
    learning_rate=9e-4, 
    seeds=[1955, 7, 20],  
    epochs=50, 
    dim_hidden=128,
    dim_augment=128, 
    ablate_no_koopman=False,
    use_complex_eigenvalues=True,
    second_delay_use_shift=True,
    ablate_rotate_back_Koopman=True, 
    ablate_shift_inside_scale=False,
    householder_reflects_latent = 2,
    householder_reflects_data = 4,
    mixing_strategy='convex', ### Here
    loss_backward_weights = [1.0, 0.0, 0.0, 0.0, 0.0],
    loss_validate_weights = [1.0, 0.0, 0.0, 0.0, 0.0],
    ablate_deterministic_y0=False, 

)
cfg.x_to_z_delay.enable_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_delay.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_delay.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_delay.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.x_to_z_deri.enable_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_deri.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_deri.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_deri.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_x_main.enable_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_x_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_x_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_x_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_push_to_z.enable_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_scale_shift = [True, False]
cfg.z_push_to_z.spectral_flags_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_hidden_layers = [False, False]
cfg.z_push_to_z.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_push_to_z.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_y_main.enable_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_y_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_y_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_y_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 76.2142, mae: 6.7053, huber: 6.2235, swd: 44.6189, ept: 27.1548
Epoch [1/50], Val Losses: mse: 61.2947, mae: 6.0596, huber: 5.5797, swd: 31.3272, ept: 29.1181
Epoch [1/50], Test Losses: mse: 57

### MSE + 0.1 SWD

In [10]:
from monotonic import DynamicTanh
import torch.nn as nn

importlib.reload(monotonic)
importlib.reload(train_config) 
cfg = train_config.FlatACLConfig(  # original householder 
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],# data_mgr.channels,              # ← number of features in your data
    batch_size=128,
    learning_rate=9e-4, 
    seeds=[1955, 7, 20],  
    epochs=50, 
    dim_hidden=128,
    dim_augment=128, 
    ablate_no_koopman=False,
    use_complex_eigenvalues=True,
    second_delay_use_shift=True,
    ablate_rotate_back_Koopman=True, 
    ablate_shift_inside_scale=False,
    householder_reflects_latent = 2,
    householder_reflects_data = 4,
    mixing_strategy='convex', ### Here
    loss_backward_weights = [1.0, 0.0, 0.0, 0.1, 0.0],
    loss_validate_weights = [1.0, 0.0, 0.0, 0.1, 0.0],
    ablate_deterministic_y0=False, 

)
cfg.x_to_z_delay.enable_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_delay.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_delay.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_delay.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.x_to_z_deri.enable_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_deri.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_deri.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_deri.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_x_main.enable_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_x_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_x_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_x_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_push_to_z.enable_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_scale_shift = [True, False]
cfg.z_push_to_z.spectral_flags_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_hidden_layers = [False, False]
cfg.z_push_to_z.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_push_to_z.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_y_main.enable_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_y_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_y_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_y_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 75.8453, mae: 6.6967, huber: 6.2146, swd: 37.6889, ept: 28.3173
Epoch [1/50], Val Losses: mse: 62.1306, mae: 6.1229, huber: 5.6424, swd: 26.0276, ept: 27.0911
Epoch [1/50], Test Losses: mse: 58

### MSE + 0.5 SWD

In [22]:
from monotonic import DynamicTanh
import torch.nn as nn

importlib.reload(monotonic)
importlib.reload(train_config) 
cfg = train_config.FlatACLConfig(  # original householder 
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],# data_mgr.channels,              # ← number of features in your data
    batch_size=128,
    learning_rate=9e-4, 
    seeds=[1955, 7, 20],  
    epochs=50, 
    dim_hidden=128,
    dim_augment=128, 
    ablate_no_koopman=False,
    use_complex_eigenvalues=True,
    second_delay_use_shift=True,
    ablate_rotate_back_Koopman=True, 
    ablate_shift_inside_scale=False,
    householder_reflects_latent = 2,
    householder_reflects_data = 4,
    mixing_strategy='convex', ### Here
    loss_backward_weights = [1.0, 0.0, 0.0, 0.5, 0.0],
    loss_validate_weights = [1.0, 0.0, 0.0, 0.5, 0.0],
    ablate_deterministic_y0=False, 

)
cfg.x_to_z_delay.enable_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_delay.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_delay.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_delay.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.x_to_z_deri.enable_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_deri.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_deri.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_deri.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_x_main.enable_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_x_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_x_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_x_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_push_to_z.enable_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_scale_shift = [True, False]
cfg.z_push_to_z.spectral_flags_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_hidden_layers = [False, False]
cfg.z_push_to_z.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_push_to_z.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_y_main.enable_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_y_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_y_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_y_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 82.0629, mae: 7.0258, huber: 6.5421, swd: 23.9779, ept: 17.9927
Epoch [1/50], Val Losses: mse: 70.0927, mae: 6.6505, huber: 6.1668, swd: 10.5825, ept: 21.2040
Epoch [1/50], Test Losses: mse: 65

## TimeMixer

### huber

In [8]:
utils.reload_modules([utils])
cfg = train_config.FlatTimeMixerConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    enc_in=data_mgr.datasets['lorenz']['channels'],
    dec_in=data_mgr.datasets['lorenz']['channels'],
    c_out=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50, 
    loss_backward_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.0, 0.0]
)
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)


Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 68.5378, mae: 6.0891, huber: 5.6115, swd: 14.7714, ept: 56.2274
Epoch [1/50], Val Losses: mse: 73.1194, mae: 6.2198, huber: 5.7449

### huber + 0.1 SWD

In [7]:
utils.reload_modules([utils])
cfg = train_config.FlatTimeMixerConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    enc_in=data_mgr.datasets['lorenz']['channels'],
    dec_in=data_mgr.datasets['lorenz']['channels'],
    c_out=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50, 
    loss_backward_weights = [0.0, 0.0, 1.0, 0.1, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.1, 0.0]
)
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)


Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 78.7088, mae: 6.4985, huber: 6.0192, swd: 2.9340, ept: 44.5411
Epoch [1/50], Val Losses: mse: 86.5853, mae: 6.6705, huber: 6.1934,

### huber + 0.5 SWD

In [9]:
utils.reload_modules([utils])
cfg = train_config.FlatTimeMixerConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    enc_in=data_mgr.datasets['lorenz']['channels'],
    dec_in=data_mgr.datasets['lorenz']['channels'],
    c_out=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50, 
    loss_backward_weights = [0.0, 0.0, 1.0, 0.5, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.5, 0.0]
)
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)


Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 89.1695, mae: 7.0887, huber: 6.6066, swd: 1.5919, ept: 30.0389
Epoch [1/50], Val Losses: mse: 89.7280, mae: 6.8923, huber: 6.4136,

### MSE

In [10]:
utils.reload_modules([utils])
cfg = train_config.FlatTimeMixerConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    enc_in=data_mgr.datasets['lorenz']['channels'],
    dec_in=data_mgr.datasets['lorenz']['channels'],
    c_out=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50, 
    loss_backward_weights = [1.0, 0.0, 0.0, 0.0, 0.0],
    loss_validate_weights = [1.0, 0.0, 0.0, 0.0, 0.0]
)
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)


Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 66.8765, mae: 6.2210, huber: 5.7406, swd: 17.2746, ept: 46.3937
Epoch [1/50], Val Losses: mse: 67.6510, mae: 6.2287, huber: 5.7502

### MSE + 0.1 SWD

In [11]:
utils.reload_modules([utils])
cfg = train_config.FlatTimeMixerConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    enc_in=data_mgr.datasets['lorenz']['channels'],
    dec_in=data_mgr.datasets['lorenz']['channels'],
    c_out=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50, 
    loss_backward_weights = [1.0, 0.0, 0.0, 0.1, 0.0],
    loss_validate_weights = [1.0, 0.0, 0.0, 0.1, 0.0]
)
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)


Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 67.1197, mae: 6.2273, huber: 5.7468, swd: 15.5230, ept: 46.2257
Epoch [1/50], Val Losses: mse: 69.4403, mae: 6.3172, huber: 5.8381

### MSE + 0.5 SWD

In [11]:
utils.reload_modules([utils])
cfg = train_config.FlatTimeMixerConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    enc_in=data_mgr.datasets['lorenz']['channels'],
    dec_in=data_mgr.datasets['lorenz']['channels'],
    c_out=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50, 
    loss_backward_weights = [1.0, 0.0, 0.0, 0.5, 0.0],
    loss_validate_weights = [1.0, 0.0, 0.0, 0.5, 0.0]
)
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)


Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 69.3558, mae: 6.3330, huber: 5.8518, swd: 9.2611, ept: 41.5565
Epoch [1/50], Val Losses: mse: 75.0004, mae: 6.5830, huber: 6.1028,

## PatchTST 

### huber  

In [12]:
utils.reload_modules([utils])
cfg_patch_tst = train_config.FlatPatchTSTConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    enc_in=data_mgr.datasets['lorenz']['channels'],
    dec_in=data_mgr.datasets['lorenz']['channels'],
    c_out=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50,
    task_name='long_term_forecast',
    factor=3,
    loss_backward_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.0, 0.0]
)
exp_patch_tst = execute_model_evaluation('lorenz', cfg_patch_tst, data_mgr, scale=False)

Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 71.3631, mae: 6.3279, huber: 5.8480, swd: 10.6537, ept: 36.1925
Epoch [1/50], Val Losses: mse: 64.9526, mae: 5.9413, huber: 5.4652

### huber + 0.1 SWD

In [8]:
utils.reload_modules([utils])
cfg_patch_tst = train_config.FlatPatchTSTConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    enc_in=data_mgr.datasets['lorenz']['channels'],
    dec_in=data_mgr.datasets['lorenz']['channels'],
    c_out=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50,
    task_name='long_term_forecast',
    factor=3,
    loss_backward_weights = [0.0, 0.0, 1.0, 0.1, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.1, 0.0]
)
exp_patch_tst = execute_model_evaluation('lorenz', cfg_patch_tst, data_mgr, scale=False)

Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 83.0614, mae: 6.7965, huber: 6.3152, swd: 3.2402, ept: 29.4681
Epoch [1/50], Val Losses: mse: 77.8420, mae: 6.4988, huber: 6.0194,

### huber + 0.5 SWD

In [13]:
utils.reload_modules([utils])
cfg_patch_tst = train_config.FlatPatchTSTConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    enc_in=data_mgr.datasets['lorenz']['channels'],
    dec_in=data_mgr.datasets['lorenz']['channels'],
    c_out=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50,
    task_name='long_term_forecast',
    factor=3,
    loss_backward_weights = [0.0, 0.0, 1.0, 0.5, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.5, 0.0]
)
exp_patch_tst = execute_model_evaluation('lorenz', cfg_patch_tst, data_mgr, scale=False)

Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 99.4185, mae: 7.6075, huber: 7.1231, swd: 3.4429, ept: 19.6252
Epoch [1/50], Val Losses: mse: 93.5616, mae: 7.1875, huber: 6.7060,

### MSE

In [14]:
utils.reload_modules([utils])
cfg_patch_tst = train_config.FlatPatchTSTConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    enc_in=data_mgr.datasets['lorenz']['channels'],
    dec_in=data_mgr.datasets['lorenz']['channels'],
    c_out=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50,
    task_name='long_term_forecast',
    factor=3,
    loss_backward_weights = [1.0, 0.0, 0.0, 0.0, 0.0],
    loss_validate_weights = [1.0, 0.0, 0.0, 0.0, 0.0]
)
exp_patch_tst = execute_model_evaluation('lorenz', cfg_patch_tst, data_mgr, scale=False)

Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 69.8124, mae: 6.4039, huber: 5.9226, swd: 12.5485, ept: 34.6618
Epoch [1/50], Val Losses: mse: 63.2514, mae: 6.0390, huber: 5.5608

### MSE + 0.1 SWD

In [12]:
utils.reload_modules([utils])
cfg_patch_tst = train_config.FlatPatchTSTConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    enc_in=data_mgr.datasets['lorenz']['channels'],
    dec_in=data_mgr.datasets['lorenz']['channels'],
    c_out=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50,
    task_name='long_term_forecast',
    factor=3,
    loss_backward_weights = [1.0, 0.0, 0.0, 0.1, 0.0],
    loss_validate_weights = [1.0, 0.0, 0.0, 0.1, 0.0]
)
exp_patch_tst = execute_model_evaluation('lorenz', cfg_patch_tst, data_mgr, scale=False)

Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 70.1795, mae: 6.4190, huber: 5.9375, swd: 11.3057, ept: 34.0583
Epoch [1/50], Val Losses: mse: 62.4589, mae: 5.9263, huber: 5.4496

### MSE + 0.5 SWD

In [4]:
utils.reload_modules([utils])
cfg_patch_tst = train_config.FlatPatchTSTConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    enc_in=data_mgr.datasets['lorenz']['channels'],
    dec_in=data_mgr.datasets['lorenz']['channels'],
    c_out=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50,
    task_name='long_term_forecast',
    factor=3,
    loss_backward_weights = [1.0, 0.0, 0.0, 0.5, 0.0],
    loss_validate_weights = [1.0, 0.0, 0.0, 0.5, 0.0]
)
exp_patch_tst = execute_model_evaluation('lorenz', cfg_patch_tst, data_mgr, scale=False)

Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 72.3810, mae: 6.5229, huber: 6.0410, swd: 7.3717, ept: 30.2300
Epoch [1/50], Val Losses: mse: 66.5961, mae: 6.2285, huber: 5.7486,

## DLinear

### huber  

In [16]:
utils.reload_modules([utils])
cfg = train_config.FlatDLinearConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50, 
    loss_backward_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
)
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 72.2890, mae: 6.3031, huber: 5.8260, swd: 22.7382, ept: 42.5628
Epoch [1/50], Val Losses: mse: 67.3290, mae: 6.1486, huber: 5.6718

### huber + 0.1 SWD

In [9]:
utils.reload_modules([utils])
cfg = train_config.FlatDLinearConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50, 
    loss_backward_weights = [0.0, 0.0, 1.0, 0.1, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.1, 0.0],
)
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 97.3154, mae: 6.9975, huber: 6.5188, swd: 2.9809, ept: 26.7117
Epoch [1/50], Val Losses: mse: 82.1644, mae: 6.5239, huber: 6.0477,

### huber + 0.5 SWD

In [5]:
utils.reload_modules([utils])
cfg = train_config.FlatDLinearConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50, 
    loss_backward_weights = [0.0, 0.0, 1.0, 0.5, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.5, 0.0],
)
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 139.4832, mae: 8.2891, huber: 7.8061, swd: 2.5330, ept: 9.9655
Epoch [1/50], Val Losses: mse: 85.6057, mae: 6.8084, huber: 6.3296,

### MSE

In [18]:
utils.reload_modules([utils])
cfg = train_config.FlatDLinearConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50, 
    loss_backward_weights = [1.0, 0.0, 0.0, 0.0, 0.0],
    loss_validate_weights = [1.0, 0.0, 0.0, 0.0, 0.0],
)
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 71.1245, mae: 6.4317, huber: 5.9511, swd: 28.6219, ept: 34.9660
Epoch [1/50], Val Losses: mse: 63.2887, mae: 6.1536, huber: 5.6736

### MSE + 0.1 SWD

In [13]:
utils.reload_modules([utils])
cfg = train_config.FlatDLinearConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50, 
    loss_backward_weights = [1.0, 0.0, 0.0, 0.1, 0.0],
    loss_validate_weights = [1.0, 0.0, 0.0, 0.1, 0.0],
)
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 71.6243, mae: 6.4569, huber: 5.9759, swd: 22.5411, ept: 34.3911
Epoch [1/50], Val Losses: mse: 64.2846, mae: 6.2107, huber: 5.7300

### MSE + 0.5 SWD

In [19]:
utils.reload_modules([utils])
cfg = train_config.FlatDLinearConfig(
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],
    batch_size=128,
    learning_rate=9e-4,
    seeds=[1955, 7, 20],
    epochs=50, 
    loss_backward_weights = [1.0, 0.0, 0.0, 0.5, 0.0],
    loss_validate_weights = [1.0, 0.0, 0.0, 0.5, 0.0],
)
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Reloading modules...
  Reloaded: utils
Module reload complete.
Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 75.4739, mae: 6.6581, huber: 6.1759, swd: 9.4787, ept: 26.9658
Epoch [1/50], Val Losses: mse: 68.6949, mae: 6.4402, huber: 5.9588,