# data

In [1]:
import torch 
import importlib
import monotonic
import data_manager
import metrics
import utils
importlib.reload(utils)
import train as Train
from train import execute_model_evaluation
import train_config
from data_manager import DatasetManager
from train_config import FlatACLConfig, FlatDLinearConfig, FlatNaiveConfig, FlatPatchTSTConfig, FlatTimeMixerConfig
from dataclasses import replace

%load_ext autoreload
%autoreload 2
modules_to_reload_list = [
    data_manager,
    Train,
    train_config,
    monotonic,
    # data_manager, # Reloaded only once even if listed twice
    utils,
    # train_config, # Reloaded only once even if listed twice
    metrics
]

# Initialize the data manager
data_mgr = DatasetManager(device='cuda')

# Load a synthetic dataset
data_mgr.load_trajectory('lorenz', steps=24999, dt=1e-2, ) # 51999 36999
# SCALE = False

LorenzSystem initialized with method: rk4 on device: cuda

Dataset: lorenz (synthetic)
Shape: torch.Size([25000, 3])
Channels: 3
Length: 25000
Parameters: {'steps': 24999, 'dt': 0.01}

Sample data (first 2 rows):
tensor([[1.0000, 0.9800, 1.1000],
        [1.0106, 1.2389, 1.0820]], device='cuda:0')


<data_manager.DatasetManager at 0x223795d38c0>

# Exp - Lorenz - 25000 - ablation

## baseline

In [3]:
from monotonic import DynamicTanh
import torch.nn as nn

importlib.reload(monotonic)
importlib.reload(train_config) 
cfg = train_config.FlatACLConfig(  # original householder 
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],# data_mgr.channels,              # ← number of features in your data
    batch_size=128,
    learning_rate=9e-4, 
    seeds=[1955, 7, 20],  
    epochs=50, 
    dim_hidden=128,
    dim_augment=128, 
    ablate_no_koopman=False,
    use_complex_eigenvalues=True,
    second_delay_use_shift=True,
    ablate_rotate_back_Koopman=True, 
    ablate_shift_inside_scale=False,
    householder_reflects_latent = 2,
    householder_reflects_data = 4,
    mixing_strategy='delay_only', 
    loss_backward_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    ablate_deterministic_y0=False, 
)
cfg.x_to_z_delay.enable_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_delay.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_delay.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_delay.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.x_to_z_deri.enable_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_deri.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_deri.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_deri.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_x_main.enable_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_x_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_x_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_x_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_push_to_z.enable_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_scale_shift = [True, False]
cfg.z_push_to_z.spectral_flags_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_hidden_layers = [False, False]
cfg.z_push_to_z.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_push_to_z.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_y_main.enable_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_y_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_y_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_y_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 74.4717, mae: 6.5322, huber: 6.0521, swd: 42.9372, ept: 32.6197
Epoch [1/50], Val Losses: mse: 59.6087, mae: 5.8431, huber: 5.3661, swd: 26.8070, ept: 38.4709
Epoch [1/50], Test Losses: mse: 55

## Ab data lengths: 

length 25000 -> 52000: longer, better geometry

Experiment Summary (ACL_lorenz_seq336_pred336_20250514_0254)
Number of runs: 3
Seeds: [1955, 7, 20]

Test Performance at Best Validation (mean ± std):
  mse: 11.4145 ± 1.2282
  mae: 1.6146 ± 0.1313
  huber: 1.2538 ± 0.1206
  swd: 0.9656 ± 0.1587
  ept: 274.5805 ± 7.4334
  count: 36.0000 ± 0.0000

# Ab core components

## Ab: no rotation

In [4]:
from monotonic import DynamicTanh
import torch.nn as nn

importlib.reload(monotonic)
importlib.reload(train_config) 
cfg = train_config.FlatACLConfig(  # original householder 
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],# data_mgr.channels,              # ← number of features in your data
    batch_size=128,
    learning_rate=9e-4, 
    seeds=[1955, 7, 20],  
    epochs=50, 
    dim_hidden=128,
    dim_augment=128, 
    ablate_no_koopman=False,
    use_complex_eigenvalues=True,
    second_delay_use_shift=True,
    ablate_rotate_back_Koopman=True, 
    ablate_shift_inside_scale=False,
    householder_reflects_latent = 2,
    householder_reflects_data = 4,
    mixing_strategy='delay_only', 
    loss_backward_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    ablate_deterministic_y0=False, 
    ablate_no_rotation=True, ### HERE
)
cfg.x_to_z_delay.enable_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_delay.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_delay.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_delay.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.x_to_z_deri.enable_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_deri.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_deri.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_deri.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_x_main.enable_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_x_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_x_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_x_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_push_to_z.enable_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_scale_shift = [True, False]
cfg.z_push_to_z.spectral_flags_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_hidden_layers = [False, False]
cfg.z_push_to_z.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_push_to_z.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_y_main.enable_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_y_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_y_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_y_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 124.7099, mae: 8.2677, huber: 7.7850, swd: 60.7525, ept: 13.6469
Epoch [1/50], Val Losses: mse: 97.9352, mae: 7.2435, huber: 6.7647, swd: 34.8426, ept: 15.1067
Epoch [1/50], Test Losses: mse: 9

## AB: Koopman Components

### AB: No Koopman

In [2]:
from monotonic import DynamicTanh
import torch.nn as nn

importlib.reload(monotonic)
importlib.reload(train_config) 
cfg = train_config.FlatACLConfig(  # original householder 
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],# data_mgr.channels,              # ← number of features in your data
    batch_size=128,
    learning_rate=9e-4, 
    seeds=[1955, 7, 20],  
    epochs=50, 
    dim_hidden=128,
    dim_augment=128, 
    # ablate_no_koopman=False,
    use_complex_eigenvalues=True,
    second_delay_use_shift=True,
    ablate_rotate_back_Koopman=True, 
    ablate_shift_inside_scale=False,
    householder_reflects_latent = 2,
    householder_reflects_data = 4,
    mixing_strategy='delay_only', 
    loss_backward_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    ablate_deterministic_y0=False, 
    ablate_no_koopman=True, ### HERE
    ablate_no_shift_in_z_push=True, ### HERE
)
cfg.x_to_z_delay.enable_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_delay.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_delay.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_delay.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.x_to_z_deri.enable_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_deri.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_deri.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_deri.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_x_main.enable_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_x_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_x_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_x_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_push_to_z.enable_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_scale_shift = [True, False]
cfg.z_push_to_z.spectral_flags_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_hidden_layers = [False, False]
cfg.z_push_to_z.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_push_to_z.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_y_main.enable_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_y_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_y_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_y_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 76.7488, mae: 6.6491, huber: 6.1683, swd: 45.4359, ept: 30.7262
Epoch [1/50], Val Losses: mse: 61.5423, mae: 5.9935, huber: 5.5153, swd: 29.4883, ept: 32.3976
Epoch [1/50], Test Losses: mse: 57

### AB: No Koopman but shift in z_push
Shift term in z alone *can* offer the increase in performance of full Koopman.

In [16]:
from monotonic import DynamicTanh
import torch.nn as nn

importlib.reload(monotonic)
importlib.reload(train_config) 
cfg = train_config.FlatACLConfig(  # original householder 
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],# data_mgr.channels,              # ← number of features in your data
    batch_size=128,
    learning_rate=9e-4, 
    seeds=[1955, 7, 20],  
    epochs=50, 
    dim_hidden=128,
    dim_augment=128, 
    # ablate_no_koopman=False,
    use_complex_eigenvalues=True,
    second_delay_use_shift=True,
    ablate_rotate_back_Koopman=True, 
    ablate_shift_inside_scale=False,
    householder_reflects_latent = 2,
    householder_reflects_data = 4,
    mixing_strategy='delay_only', 
    loss_backward_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    ablate_deterministic_y0=False, 
    ablate_no_koopman=True, ### HERE
    ablate_no_shift_in_z_push=False, ### HERE
)
cfg.x_to_z_delay.enable_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_delay.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_delay.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_delay.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.x_to_z_deri.enable_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_deri.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_deri.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_deri.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_x_main.enable_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_x_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_x_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_x_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_push_to_z.enable_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_scale_shift = [True, False]
cfg.z_push_to_z.spectral_flags_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_hidden_layers = [False, False]
cfg.z_push_to_z.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_push_to_z.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_y_main.enable_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_y_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_y_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_y_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 74.8636, mae: 6.5581, huber: 6.0778, swd: 43.2033, ept: 32.2648
Epoch [1/50], Val Losses: mse: 60.1865, mae: 5.8990, huber: 5.4218, swd: 28.1588, ept: 35.8416
Epoch [1/50], Test Losses: mse: 55

### AB: Koopman but with no shift in z_push
Yet, Koopman alone *is* useful, it is just that shift overrides it.
Basically, you have a free interpretability enhancement,
without hurting the performance gain by shift term.  

In [18]:
from monotonic import DynamicTanh
import torch.nn as nn

importlib.reload(monotonic)
importlib.reload(train_config) 
cfg = train_config.FlatACLConfig(  # original householder 
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],# data_mgr.channels,              # ← number of features in your data
    batch_size=128,
    learning_rate=9e-4, 
    seeds=[1955, 7, 20],  
    epochs=50, 
    dim_hidden=128,
    dim_augment=128, 
    # ablate_no_koopman=False,
    use_complex_eigenvalues=True,
    second_delay_use_shift=True,
    ablate_rotate_back_Koopman=True, 
    ablate_shift_inside_scale=False,
    householder_reflects_latent = 2,
    householder_reflects_data = 4,
    mixing_strategy='delay_only', 
    loss_backward_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    ablate_deterministic_y0=False, 
    ablate_no_koopman=False, ### HERE
    ablate_no_shift_in_z_push=True, ### HERE
)
cfg.x_to_z_delay.enable_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_delay.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_delay.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_delay.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.x_to_z_deri.enable_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_deri.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_deri.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_deri.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_x_main.enable_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_x_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_x_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_x_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_push_to_z.enable_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_scale_shift = [True, False]
cfg.z_push_to_z.spectral_flags_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_hidden_layers = [False, False]
cfg.z_push_to_z.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_push_to_z.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_y_main.enable_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_y_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_y_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_y_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 76.5795, mae: 6.6441, huber: 6.1634, swd: 45.2190, ept: 31.0791
Epoch [1/50], Val Losses: mse: 61.1763, mae: 5.9559, huber: 5.4783, swd: 27.7987, ept: 34.6733
Epoch [1/50], Test Losses: mse: 56

### AB: Koopman, but do not rotate back
The actual transformation on z-space is no longer normal as UAUT(z).
A general theme is that the more structure it is, the better the performance.
A general K is worse than complex or real-valued fixed parameter. (Not tested here)

In [3]:
from monotonic import DynamicTanh
import torch.nn as nn

importlib.reload(monotonic)
importlib.reload(train_config) 
cfg = train_config.FlatACLConfig(  # original householder 
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],# data_mgr.channels,              # ← number of features in your data
    batch_size=128,
    learning_rate=9e-4, 
    seeds=[1955, 7, 20],  
    epochs=50, 
    dim_hidden=128,
    dim_augment=128, 
    # ablate_no_koopman=False,
    use_complex_eigenvalues=True,
    second_delay_use_shift=True,
    # ablate_rotate_back_Koopman=True, 
    ablate_shift_inside_scale=False,
    householder_reflects_latent = 2,
    householder_reflects_data = 4,
    mixing_strategy='delay_only', 
    loss_backward_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    ablate_deterministic_y0=False, 
    ablate_no_koopman=False, ### HERE
    ablate_no_shift_in_z_push=False, ### HERE
    ablate_rotate_back_Koopman=False,
)
cfg.x_to_z_delay.enable_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_delay.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_delay.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_delay.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.x_to_z_deri.enable_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_deri.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_deri.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_deri.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_x_main.enable_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_x_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_x_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_x_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_push_to_z.enable_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_scale_shift = [True, False]
cfg.z_push_to_z.spectral_flags_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_hidden_layers = [False, False]
cfg.z_push_to_z.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_push_to_z.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_y_main.enable_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_y_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_y_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_y_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 72.6958, mae: 6.3962, huber: 5.9170, swd: 40.6140, ept: 34.8023
Epoch [1/50], Val Losses: mse: 59.5880, mae: 5.7859, huber: 5.3100, swd: 23.9736, ept: 43.3834
Epoch [1/50], Test Losses: mse: 55

### AB: Koopman, but with real valued eigenvalues
It is slightly better; yet in previous AB studies (not presented here), it has 
a negative impact. Worth investigating more.

In [4]:
from monotonic import DynamicTanh
import torch.nn as nn

importlib.reload(monotonic)
importlib.reload(train_config) 
cfg = train_config.FlatACLConfig(  # original householder 
    seq_len=336,
    pred_len=336,
    channels=data_mgr.datasets['lorenz']['channels'],# data_mgr.channels,              # ← number of features in your data
    batch_size=128,
    learning_rate=9e-4, 
    seeds=[1955, 7, 20],  
    epochs=50, 
    dim_hidden=128,
    dim_augment=128, 
    # ablate_no_koopman=False,
    # use_complex_eigenvalues=True,
    second_delay_use_shift=True,
    # ablate_rotate_back_Koopman=True, 
    ablate_shift_inside_scale=False,
    householder_reflects_latent = 2,
    householder_reflects_data = 4,
    mixing_strategy='delay_only', 
    loss_backward_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    loss_validate_weights = [0.0, 0.0, 1.0, 0.0, 0.0],
    ablate_deterministic_y0=False, 
    ablate_no_koopman=False, ### HERE
    ablate_no_shift_in_z_push=False, ### HERE
    ablate_rotate_back_Koopman=True, ### HERE
    use_complex_eigenvalues=False, ###HERE
)
cfg.x_to_z_delay.enable_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_delay.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_delay.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_delay.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_delay.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.x_to_z_deri.enable_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_scale_shift = [True, False]
cfg.x_to_z_deri.spectral_flags_magnitudes = [False, True]
cfg.x_to_z_deri.spectral_flags_hidden_layers = [False, False]
cfg.x_to_z_deri.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.x_to_z_deri.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_x_main.enable_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_x_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_x_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_x_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_x_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_push_to_z.enable_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_scale_shift = [True, False]
cfg.z_push_to_z.spectral_flags_magnitudes = [False, True]
cfg.z_push_to_z.spectral_flags_hidden_layers = [False, False]
cfg.z_push_to_z.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_push_to_z.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]

cfg.z_to_y_main.enable_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_scale_shift = [True, False]
cfg.z_to_y_main.spectral_flags_magnitudes = [False, True]
cfg.z_to_y_main.spectral_flags_hidden_layers = [False, False]
cfg.z_to_y_main.activations_scale_shift = ["relu6", "dynamic_tanh"]
cfg.z_to_y_main.activations_hidden_layers = [nn.ELU, nn.LogSigmoid]
exp = execute_model_evaluation('lorenz', cfg, data_mgr, scale=False)

Shape of training data: torch.Size([17500, 3])
Shape of validation data: torch.Size([2500, 3])
Shape of testing data: torch.Size([5000, 3])
global_std.shape: torch.Size([3])
Global Std for lorenz: tensor([7.9152, 9.0134, 8.6069], device='cuda:0')
Train set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Validation set sample shapes: torch.Size([336, 3]), torch.Size([336, 3])
Test set data shapes: torch.Size([5000, 3]), torch.Size([5000, 3])
Number of batches in train_loader: 132
Batch 0: Data shape torch.Size([128, 336, 3]), Target shape torch.Size([128, 336, 3])

Data Preparation: lorenz
Sequence Length: 336
Prediction Length: 336
Batch Size: 128
Scaling: No
Train Split: 0.7
Val Split: 0.8
Training Batches: 132
Validation Batches: 15
Test Batches: 34


Epoch [1/50], Train Losses: mse: 74.3266, mae: 6.5198, huber: 6.0397, swd: 42.9020, ept: 32.9033
Epoch [1/50], Val Losses: mse: 60.8405, mae: 5.9287, huber: 5.4517, swd: 27.2381, ept: 34.2182
Epoch [1/50], Test Losses: mse: 55