In [1]:
import torch
from torch.utils.data import DataLoader
from state_quantization.dataset import load_dataset
from state_quantization.dataset import DynamicsModelDataset
from state_quantization.forcasting_models import LSTMForcasting
from state_quantization.quantization_models import DiscAutoEncoder
from state_quantization.forcasting_quantization_models import ForcastingQuant, EmbeddedAEForcastingQuant, ForcastingQuantInferenceWrapper
from state_quantization.trainer import ForcastingQuantTrainer
from torch.optim.lr_scheduler import MultiStepLR
from state_quantization.train import train_model, test_step
from state_quantization.eval import eval_model, compare_models

In [2]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
print(f"Using Device: {device}")
torch.backends.cudnn.benchmark = True

Using Device: cuda:0


## Load Dataset

In [3]:
dataset_input_key = 'merged_input'
dataset_output_key = 'merged_output'
dataset_file_path = 'tmp/ib-out/ib-samples-la.npy'
normalized_data_params_save_path = 'state_quantization/NormalizeInputConfigs.pkl'
dataset_device = 'cpu'
y_indexes = [4, 6]

train_dataset, val_dataset = load_dataset(file_path=dataset_file_path, input_key=dataset_input_key,
                                          output_key=dataset_output_key, dataset_class=DynamicsModelDataset,
                                          normalize=True, device=dataset_device, y_clip_range=y_indexes,
                                          normalized_data_params_save_path=normalized_data_params_save_path)

batch_size = 8000
params = {'batch_size': batch_size,
          'shuffle': True,
          'num_workers': 0,
          'drop_last': True,
          'pin_memory': not train_dataset.x.is_cuda}
print(params)
train_loader = DataLoader(train_dataset, **params)
val_loader = DataLoader(val_dataset, **params)
train_dataset.y[0]

Clipping y
torch.Size([672000, 39, 6])
torch.Size([672000, 10, 2])
torch.Size([288000, 39, 6])
torch.Size([288000, 10, 2])
{'batch_size': 8000, 'shuffle': True, 'num_workers': 0, 'drop_last': True, 'pin_memory': True}


tensor([[ 1.6792, -0.8336],
        [ 1.6763, -0.8298],
        [ 1.4791, -0.8124],
        [ 1.6555, -0.7719],
        [ 1.6520, -0.7387],
        [ 1.4680, -0.7181],
        [ 1.4315, -0.7320],
        [ 1.5838, -0.7297],
        [ 1.5961, -0.7246],
        [ 1.6831, -0.7258]])

## Define model

In [4]:
model_path = 'tmp/state_quantization/model_aeq_new'
untrained_model_path = 'tmp/state_quantization/untrained_model_aeq'
num_of_features = train_dataset.get_features_size()
seq_len = train_dataset.get_seq_len()
hidden_size = 20
out_size = train_dataset.get_output_feature_size()
print(f'Out Size:{out_size}')
look_ahead = train_dataset.get_look_ahead_size()
n_layers = 1
dropout = 0.1

forcasting_model = LSTMForcasting(features=num_of_features, hidden_size=hidden_size, out_size=out_size, seq_len=seq_len,
                                  look_ahead=look_ahead, dropout=dropout, n_layers=n_layers)

disc_autoencoder_input_size = hidden_size * 2
bottleneck_size = 20
ae_dropout = 0.0
disc_autoencoder = DiscAutoEncoder(input_size=disc_autoencoder_input_size, bottleneck_size=bottleneck_size,
                                   dropout=ae_dropout)

model = ForcastingQuant(forcasting_model=forcasting_model, autoencoder_quant_model=disc_autoencoder).to(device=device)
eval_model = EmbeddedAEForcastingQuant(features=num_of_features, hidden_size=hidden_size, out_size=out_size, seq_len=seq_len,
                                  look_ahead=look_ahead, dropout=dropout, n_layers=n_layers, autoencoder_quant_model=disc_autoencoder).to(device=device)
torch.save(ForcastingQuantInferenceWrapper(model), untrained_model_path)
load_to_gpu = model.is_cuda() and not train_dataset.x.is_cuda
print(load_to_gpu)



Out Size:2
LSTM Layers
ModuleList(
  (0): LSTMCell(6, 20)
)
LSTM Dropout Layers
ModuleList()
Fully Connected Layers
Sequential(
  (0): Linear(in_features=20, out_features=20, bias=True)
  (1): GELU(approximate=none)
  (2): Dropout(p=0.1, inplace=False)
  (3): Linear(in_features=20, out_features=20, bias=True)
  (4): GELU(approximate=none)
  (5): Dropout(p=0.1, inplace=False)
  (6): Linear(in_features=20, out_features=2, bias=True)
)
Encoder Layers
Sequential(
  (0): Linear(in_features=40, out_features=40, bias=True)
  (1): GELU(approximate=none)
  (2): Dropout(p=0.0, inplace=False)
  (3): Linear(in_features=40, out_features=20, bias=True)
  (4): GELU(approximate=none)
  (5): Dropout(p=0.0, inplace=False)
  (6): Linear(in_features=20, out_features=10, bias=True)
  (7): GELU(approximate=none)
  (8): Dropout(p=0.0, inplace=False)
)
Bottleneck Layers
Sequential(
  (0): Linear(in_features=10, out_features=20, bias=True)
  (1): Tanh()
  (2): StraightThroughEstimator()
)
Decoded Layers
Sequen

## Define Trainer

In [5]:
gamma = 0.1

forecasting_learning_rate = 1e-3
autoencoder_learning_rate = 1e-3

forecasting_lr_milestones = [35]
autoencoder_lr_milestones = [100, 150]
forecasting_optimizer = torch.optim.Adam(model.forcasting_model.parameters(),
                                         lr=forecasting_learning_rate)
autoencoder_optimizer = torch.optim.Adam(model.autoencoder_quant_model.parameters(),
                                         lr=autoencoder_learning_rate)
forecasting_lr_scheduler = MultiStepLR(forecasting_optimizer, milestones=forecasting_lr_milestones, gamma=gamma)
autoencoder_lr_scheduler = MultiStepLR(autoencoder_optimizer, milestones=autoencoder_lr_milestones, gamma=gamma)
n_epochs = 200

trainer = ForcastingQuantTrainer(forcasting_quant_model=model, train_loader=train_loader, test_loader=val_loader,
                                 load_to_gpu=load_to_gpu, forecasting_optimizer=forecasting_optimizer,
                                 forecasting_lr_scheduler=forecasting_lr_scheduler,
                                 autoencoder_lr_scheduler=autoencoder_lr_scheduler,
                                 autoencoder_optimizer=autoencoder_optimizer,autoencoder_training_start=50,additional_eval_model=eval_model)



## Train Model

In [6]:
trainer.train(n_epochs=n_epochs)

Untrained test
--------
--------------------------------------
Forcasting Test loss: 1.0113959428336885
--------------------------------------
--------------------------------------
Epoch 1
---------
--------------------------------------
Forcasting Train loss: 0.6881995119509243
--------------------------------------
Forcasting Test loss: 0.25023671570751405
--------------------------------------
Forecasting lr: [0.001]
Autoencoder lr: [0.001]
Epoch time: epoch_time = 9.501s
--------------------------------------
--------------------------------------
Epoch 2
---------




--------------------------------------
Forcasting Train loss: 0.158347257279924
--------------------------------------
Forcasting Test loss: 0.08156348806288508
--------------------------------------
Forecasting lr: [0.001]
Autoencoder lr: [0.001]
Epoch time: epoch_time = 9.367s
--------------------------------------
--------------------------------------
Epoch 3
---------
--------------------------------------
Forcasting Train loss: 0.10313189961016178
--------------------------------------
Forcasting Test loss: 0.06504983268678188
--------------------------------------
Forecasting lr: [0.001]
Autoencoder lr: [0.001]
Epoch time: epoch_time = 9.524s
--------------------------------------
--------------------------------------
Epoch 4
---------
--------------------------------------
Forcasting Train loss: 0.08750603915680022
--------------------------------------
Forcasting Test loss: 0.056390467203325696
--------------------------------------
Forecasting lr: [0.001]
Autoencoder lr: [0.

## Save Model

In [7]:
model = ForcastingQuantInferenceWrapper(model)

In [8]:
torch.save(model, model_path)
torch.cuda.empty_cache()
del model