In [1]:
from os import makedirs
from os.path import join, dirname
import uuid
import torch
from functools import partial
from papermill import execute_notebook


## Initial Setup

Create directory structure and initialize parameters for the experiment

In [2]:
experiment_id = uuid.uuid4().__str__()
print(f'Experiment ID: {experiment_id}')

use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

root_path = join('tmp', 'experiments', experiment_id)
notebook_out_path = join(root_path, 'notebook_outputs')
tensorboard_dir = join(root_path, 'tensorboard_logs')
forecasting_models_root_save_path = join(root_path, 'state_quantization')
makedirs(dirname(root_path), exist_ok=True)
makedirs(notebook_out_path, exist_ok=True)
makedirs(dirname(tensorboard_dir), exist_ok=True)

bits = 20
model_names = [f'model_h_c-{bits}bits', f'model_aeq-{bits}bits', f'model_final_h-{bits}bits']
log_output = True

Experiment ID: 302e1c19-b79a-46a8-bc5f-ad3227eabd00


## Create Forecasting Model Training Tasks

These tasks execute the notebooks which are responsible for the training of forecasting models.

In [3]:

forecasting_tensorboard_dir = join(tensorboard_dir,'forecasting_models')
makedirs(dirname(forecasting_tensorboard_dir), exist_ok=True)
train_lstm_ae_notebook = 'train-LSTM-AE.ipynb'

train_lstm_ae_task = partial(execute_notebook,

                             input_path=train_lstm_ae_notebook,
                             output_path=join(notebook_out_path, train_lstm_ae_notebook),
                             parameters=
                             dict(
                                 bits=bits,
                                 use_cuda=use_cuda,
                                 model_path=join(forecasting_models_root_save_path, f'model_aeq-{bits}bits'),
                                 untrained_model_path=join(forecasting_models_root_save_path,
                                                           f'untrained_model_aeq-{bits}bits'),
                                 log_dir=forecasting_tensorboard_dir
                             ),
                             log_output=log_output
                             )

train_discHC_notebook = 'train_DiscHC.ipynb'
train_discHC_task = partial(execute_notebook,
                            input_path=train_discHC_notebook,
                            output_path=join(notebook_out_path, train_discHC_notebook),
                            parameters=
                            dict(
                                bits=bits,
                                use_cuda=use_cuda,
                                model_path=join(forecasting_models_root_save_path, f'model_h_c-{bits}bits'),
                                untrained_model_path=join(forecasting_models_root_save_path,
                                                          f'untrained_model_h_c-{bits}bits'),
                                model_dict_path=join(forecasting_models_root_save_path, 'model_h_c_dict'),
                                log_dir=forecasting_tensorboard_dir
                            ),
                            log_output=log_output)

train_disc_final_h_notebook = 'train_DiscFinalH.ipynb'
train_disc_final_h_task = partial(execute_notebook,
                                  input_path=train_disc_final_h_notebook,
                                  output_path=join(notebook_out_path,
                                                   train_disc_final_h_notebook),
                                  parameters=
                                  dict(
                                      bits=bits,
                                      use_cuda=use_cuda,
                                      model_path=join(forecasting_models_root_save_path, f'model_final_h-{bits}bits'),
                                      untrained_model_path=join(forecasting_models_root_save_path,
                                                                f'untrained_model_final_h-{bits}bits'),
                                      model_dict_path=join(forecasting_models_root_save_path, 'model_final_h_dict'),
                                      log_dir=forecasting_tensorboard_dir
                                  ),
                                  log_output=log_output
                                  )


## Create Model Based Q Learning Tasks

These tasks run the model based q learning notebooks for each trained and untrained forecasting model.

In [4]:
mb_q_learning_tensorboard_dir = join(tensorboard_dir,'mb_q_learning')
makedirs(dirname(mb_q_learning_tensorboard_dir), exist_ok=True)
train_model_based_q_learning_tasks = []
train_model_based_q_learning_task_notebook = 'train_mb_offline_q_learning.ipynb'
q_learning_epochs = 10

for model_name in model_names:
    train_model_based_q_learning_tasks.append(partial(
        execute_notebook,
        input_path=train_model_based_q_learning_task_notebook,
        output_path=join(notebook_out_path, f'{model_name}_{train_model_based_q_learning_task_notebook}'),
        parameters=
        dict(
            device='cuda' if use_cuda else 'cpu',
            root_path=root_path,
            total_epochs=q_learning_epochs,
            model_name=model_name,
            log_dir=mb_q_learning_tensorboard_dir
        ),
        log_output=log_output
    ))

## Create Q Learning Tasks

In [5]:
q_learning_tensorboard_dir = join(tensorboard_dir,'q_learning')
makedirs(dirname(q_learning_tensorboard_dir), exist_ok=True)
train_q_learning_tasks = []
train_q_learning_task_notebook = 'train_q_learning.ipynb'

for model_name in model_names:
    train_q_learning_tasks.append(partial(
        execute_notebook,
        input_path=train_q_learning_task_notebook,
        output_path=join(notebook_out_path, f'{model_name}_{train_q_learning_task_notebook}'),
        parameters=
        dict(
            device='cuda' if use_cuda else 'cpu',
            root_path=root_path,
            total_epochs=q_learning_epochs,
            model_name=model_name,
            log_dir=q_learning_tensorboard_dir
        ),
        log_output=log_output
    ))

## Create Offline Dataset Creation Task

## Create Q Learning Tasks

In [6]:

import time
from multiprocessing.pool import Pool
start = time.time()
with Pool(3) as pool:
    train_lstm_ae_task_result = pool.apply_async(train_lstm_ae_task)
    train_discHC_task_result = pool.apply_async(train_discHC_task)
    train_disc_final_h_task_result = pool.apply_async(train_disc_final_h_task)

    train_lstm_ae_task_result.get()
    train_discHC_task_result.get()
    train_disc_final_h_task_result.get()

    mb_q_results = []
    for train_model_based_q_learning_task in train_model_based_q_learning_tasks:
        result = pool.apply_async(train_model_based_q_learning_task)
        mb_q_results.append(result)

    q_results = []
    for train_q_learning_task in train_q_learning_tasks:
        result = pool.apply_async(train_q_learning_task)
        q_results.append(result)

    for mb_q_result in mb_q_results:
        mb_q_result.get()

    for q_result in q_results:
        q_result.get()

end = time.time()
print((end - start)/3600)


2022-12-19 14:25:15.508478: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-19 14:25:15.447975: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-19 14:25:15.481127: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the ap

Executing:   0%|          | 0/7 [00:00<?, ?cell/s]

Input notebook does not contain a cell with tag 'parameters'
Input notebook does not contain a cell with tag 'parameters'


Executing:   0%|          | 0/7 [00:00<?, ?cell/s]

Executing:   0%|          | 0/7 [00:00<?, ?cell/s]

2022-12-19 14:27:31.405590: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-19 14:27:31.417582: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-19 14:27:31.447850: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the ap

PapermillExecutionError: 
---------------------------------------------------------------------------
Exception encountered at "In [7]":
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[7], line 13
      7 callbacks = [
      8     SaveBestPolicy(save_path=best_save_path),
      9     SavePolicyOnTrainingEnd(save_path=last_save_path),
     10     SavePolicyXInterval(x_interval_save_path)
     11 ]
     12 trainer = OnlineTrainer(algo=algo,callbacks=callbacks)
---> 13 trainer.fit()

File ~/PycharmProjects/State-Space-Discretization/base_rl/train.py:28, in OnlineTrainer.fit(self)
     26 def fit(self):
     27     self.execute_callback(self.on_training_start_cb)
---> 28     self.algo.setup()
     29     while self.algo.keep_training():
     30         self.execute_callback(self.on_episode_start_cb)

File ~/PycharmProjects/State-Space-Discretization/q_learning/algorithm.py:69, in QLearningAlgo.setup(self)
     67 def setup(self):
     68     self.writer = SummaryWriter(comment=self.comment, log_dir=self.log_dir)
---> 69     self.env = self.env_creator(**self.env_kwargs)
     70     self.mean_train_reward_per_epoch = []
     71     self.current_epoch = 0

File ~/PycharmProjects/State-Space-Discretization/envs/env_creator.py:29, in IBGymModelQ_creator(model_path, device, steps_per_episode)
     27 normalize_output = NormalizeTransform.load('tmp/transformer/NormalizeOutputConfigs.pkl').to(device)
     28 lstm_quantize = LSTMQuantize(model=model, normalize_transformer=normalize_input, reshape=reshape)
---> 29 return IBGymModelQ(device=device, setpoint=70, reward_type='classic', action_type='discrete',
     30                    observation_type='include_past',
     31                    reset_after_timesteps=steps_per_episode, n_past_timesteps=model.get_seq_len(),
     32                    lstm_quantize=lstm_quantize, output_normalize_transform=normalize_output)

File ~/PycharmProjects/State-Space-Discretization/envs/IBGym_mod_envs.py:271, in IBGymModelQ.__init__(self, lstm_quantize, output_normalize_transform, device, **kwargs)
    269 self.h = 0
    270 self.output_normalize_transform = output_normalize_transform
--> 271 super().__init__(**kwargs)

File ~/PycharmProjects/State-Space-Discretization/envs/IBGym_mod_envs.py:90, in IBGymModded.__init__(self, setpoint, reward_type, action_type, observation_type, reset_after_timesteps, init_seed, n_past_timesteps)
     87 else:
     88     raise ValueError('Invalid observation_type. observation_type can either be "classic" or "include_past"')
---> 90 self.reset()

File ~/PycharmProjects/State-Space-Discretization/envs/IBGym_mod_envs.py:308, in IBGymModelQ.reset(self)
    306 self.done = False
    307 self.last_observation = return_observation
--> 308 discrete_obs = self.lstm_quantize(return_observation)[0]
    309 self.model_out = self.lstm_quantize.get_continuous_output()
    310 self.v = self.last_observation[1]

File ~/PycharmProjects/State-Space-Discretization/state_quantization/transforms.py:76, in LSTMQuantize.__call__(self, x)
     74 x = self.normalize_transformer.transform(x)
     75 x = torch.nan_to_num(x, 1)
---> 76 self.y = self.model(x)
     77 return self.bin2dec(self.model.quantized_state).tolist()

File ~/PycharmProjects/State-Space-Discretization/venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1190, in Module._call_impl(self, *input, **kwargs)
   1186 # If we don't have any hooks, we want to skip the rest of the logic in
   1187 # this function, and just call forward.
   1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1189         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190     return forward_call(*input, **kwargs)
   1191 # Do not call functions when jit is used
   1192 full_backward_hooks, non_full_backward_hooks = [], []

File ~/PycharmProjects/State-Space-Discretization/state_quantization/quantization_models.py:79, in ForcastingDiscHC.forward(self, x)
     76 (h, c) = self.init_hidden(x.shape[0])
     78 for i in range(self.seq_len):
---> 79     self.lstm_layers_forward(x=x[:, i, :], h=h, c=c)
     81 self.quantized_state = torch.cat((h[-1], c[-1]), dim=1)
     83 output = self.final_dense_forward(h[-1])

File ~/PycharmProjects/State-Space-Discretization/state_quantization/quantization_models.py:67, in ForcastingDiscHC.lstm_layers_forward(self, x, h, c)
     65 layer_input = x
     66 for layer_idx in range(self.n_layers):
---> 67     (h[layer_idx], c[layer_idx]) = self.lstm_layers[layer_idx](layer_input, (h[layer_idx], c[layer_idx]))
     68     h[layer_idx] = self.h_quantization_layers[layer_idx](h[layer_idx])
     69     c[layer_idx] = self.c_quantization_layers[layer_idx](c[layer_idx])

File ~/PycharmProjects/State-Space-Discretization/venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1190, in Module._call_impl(self, *input, **kwargs)
   1186 # If we don't have any hooks, we want to skip the rest of the logic in
   1187 # this function, and just call forward.
   1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1189         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190     return forward_call(*input, **kwargs)
   1191 # Do not call functions when jit is used
   1192 full_backward_hooks, non_full_backward_hooks = [], []

File ~/PycharmProjects/State-Space-Discretization/venv/lib/python3.10/site-packages/torch/nn/modules/rnn.py:1194, in LSTMCell.forward(self, input, hx)
   1191 else:
   1192     hx = (hx[0].unsqueeze(0), hx[1].unsqueeze(0)) if not is_batched else hx
-> 1194 ret = _VF.lstm_cell(
   1195     input, hx,
   1196     self.weight_ih, self.weight_hh,
   1197     self.bias_ih, self.bias_hh,
   1198 )
   1200 if not is_batched:
   1201     ret = (ret[0].squeeze(0), ret[1].squeeze(0))

RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
