In [None]:
from os import makedirs
from os.path import join, dirname
import uuid
import torch
from functools import partial
from papermill import execute_notebook
from offline_dataset.multi_modal_dataset import generate_multimodal_dataset
import time
from multiprocessing.pool import Pool
from glob import glob
from shutil import rmtree

## Initial Setup

Create directory structure and initialize parameters for the experiment

In [None]:
experiment_id = uuid.uuid4().__str__()
print(f'Experiment ID: {experiment_id}')

use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

experiments_folder_path = join('tmp', 'experiments')
root_path = join(experiments_folder_path, experiment_id)
notebook_out_path = join(root_path, 'notebook_outputs')
tensorboard_dir = join(root_path, 'tensorboard_logs')
forecasting_models_root_save_path = join(root_path, 'state_quantization')
makedirs(dirname(root_path), exist_ok=True)
makedirs(notebook_out_path, exist_ok=True)
makedirs(dirname(tensorboard_dir), exist_ok=True)

bits = 20
model_names = [f'model_h_c-{bits}bits', f'model_aeq-{bits}bits', f'model_final_h-{bits}bits', f'untrained_model_h_c-{bits}bits', f'untrained_model_aeq-{bits}bits', f'untrained_model_final_h-{bits}bits']
log_output = True
q_learning_epochs = 7000
policy_iteration_epochs=100
policy_iteration_evaluation_epochs=500
r_min_total_epochs=500

## Create Forecasting Model Training Tasks

These tasks execute the notebooks which are responsible for the training of forecasting models.

In [None]:

forecasting_tensorboard_dir = join(tensorboard_dir,'forecasting_models')
makedirs(dirname(forecasting_tensorboard_dir), exist_ok=True)
train_lstm_ae_notebook = 'train_LSTM-AE.ipynb'

train_lstm_ae_task = partial(execute_notebook,

                             input_path=train_lstm_ae_notebook,
                             output_path=join(notebook_out_path, train_lstm_ae_notebook),
                             parameters=
                             dict(
                                 bits=bits,
                                 use_cuda=use_cuda,
                                 model_path=join(forecasting_models_root_save_path, f'model_aeq-{bits}bits'),
                                 untrained_model_path=join(forecasting_models_root_save_path,
                                                           f'untrained_model_aeq-{bits}bits'),
                                 log_dir=join(forecasting_tensorboard_dir,f'model_aeq-{bits}bits')
                             ),
                             log_output=log_output
                             )

train_discHC_notebook = 'train_DiscHC.ipynb'
train_discHC_task = partial(execute_notebook,
                            input_path=train_discHC_notebook,
                            output_path=join(notebook_out_path, train_discHC_notebook),
                            parameters=
                            dict(
                                bits=bits,
                                use_cuda=use_cuda,
                                model_path=join(forecasting_models_root_save_path, f'model_h_c-{bits}bits'),
                                untrained_model_path=join(forecasting_models_root_save_path,
                                                          f'untrained_model_h_c-{bits}bits'),
                                model_dict_path=join(forecasting_models_root_save_path, 'model_h_c_dict'),
                                log_dir=join(forecasting_tensorboard_dir,f'model_h_c-{bits}bits')
                            ),
                            log_output=log_output)

train_disc_final_h_notebook = 'train_DiscFinalH.ipynb'
train_disc_final_h_task = partial(execute_notebook,
                                  input_path=train_disc_final_h_notebook,
                                  output_path=join(notebook_out_path,
                                                   train_disc_final_h_notebook),
                                  parameters=
                                  dict(
                                      bits=bits,
                                      use_cuda=use_cuda,
                                      model_path=join(forecasting_models_root_save_path, f'model_final_h-{bits}bits'),
                                      untrained_model_path=join(forecasting_models_root_save_path,
                                                                f'untrained_model_final_h-{bits}bits'),
                                      model_dict_path=join(forecasting_models_root_save_path, 'model_final_h_dict'),
                                      log_dir=join(forecasting_tensorboard_dir,f'model_final_h-{bits}bits')
                                  ),
                                  log_output=log_output
                                  )


## Create Model Based Q Learning Tasks

These tasks run the model based q learning notebooks for each trained and untrained forecasting model.

In [None]:
mb_q_learning_tensorboard_dir = join(tensorboard_dir,'mb_q_learning')
makedirs(dirname(mb_q_learning_tensorboard_dir), exist_ok=True)
train_model_based_q_learning_tasks = []
train_model_based_q_learning_task_notebook = 'train_mb_offline_q_learning.ipynb'


for model_name in model_names:
    train_model_based_q_learning_tasks.append(partial(
        execute_notebook,
        input_path=train_model_based_q_learning_task_notebook,
        output_path=join(notebook_out_path, f'{model_name}_{train_model_based_q_learning_task_notebook}'),
        parameters=
        dict(
            device='cuda' if use_cuda else 'cpu',
            root_path=root_path,
            total_epochs=q_learning_epochs,
            model_name=model_name,
            log_dir=join(mb_q_learning_tensorboard_dir,model_name)
        ),
        log_output=log_output
    ))

## Create Q Learning Tasks

In [None]:
q_learning_tensorboard_dir = join(tensorboard_dir,'q_learning')
makedirs(dirname(q_learning_tensorboard_dir), exist_ok=True)
train_q_learning_tasks = []
train_q_learning_task_notebook = 'train_q_learning.ipynb'

for model_name in model_names:
    train_q_learning_tasks.append(partial(
        execute_notebook,
        input_path=train_q_learning_task_notebook,
        output_path=join(notebook_out_path, f'{model_name}_{train_q_learning_task_notebook}'),
        parameters=
        dict(
            device='cuda' if use_cuda else 'cpu',
            root_path=root_path,
            total_epochs=q_learning_epochs,
            model_name=model_name,
            log_dir=join(q_learning_tensorboard_dir,model_name)
        ),
        log_output=log_output
    ))

## Create Policy Iteration Task

In [None]:
training_episodes = [10, 100, 1000, 10000] # the number of episodes to use for offline training and evaluation
train_policy_iteration_notebook = 'train_policy_iteration.ipynb'
train_policy_iteration_task = partial(
    execute_notebook,
    input_path=train_policy_iteration_notebook,
    output_path=join(notebook_out_path, train_policy_iteration_notebook),
    parameters=
    dict(
        model_names=model_names,
        root_path=root_path,
        training_episodes=training_episodes,
        total_epochs=policy_iteration_epochs,
        eval_epochs=policy_iteration_evaluation_epochs
    )
)

## Create R-Min Task

In [None]:
min_count = [1, 2, 3, 5] # Min count for each element in training_episodes
train_r_min_notebook = 'train_r_min.ipynb'
train_r_min_task = partial(
    execute_notebook,
    input_path=train_r_min_notebook,
    output_path=join(notebook_out_path, train_r_min_notebook),
    parameters=
    dict(
        model_names=model_names,
        root_path=root_path,
        training_episodes=training_episodes,
        min_count=min_count,
        total_epochs=r_min_total_epochs
    )
)

## Offline Cleanup Task

In [None]:
def cleanup_task():
    files_to_delete = []
    mdp_files_paths = join(experiments_folder_path,'**','mdp','')
    rl_trajectories = join(experiments_folder_path,'**','offline_rl_trajectories','')
    dataset_creator_tmp_files = join(experiments_folder_path,'**','dataset_creator_tmp','')
    files_to_delete.extend(glob(mdp_files_paths))
    files_to_delete.extend(glob(rl_trajectories))
    files_to_delete.extend(glob(dataset_creator_tmp_files))
    for file in files_to_delete:
        rmtree(file)
    print(files_to_delete)
    return files_to_delete


## Execute The Pipeline

In [None]:
start = time.time()
with Pool(3) as pool:
    train_lstm_ae_task_result = pool.apply_async(train_lstm_ae_task)
    train_discHC_task_result = pool.apply_async(train_discHC_task)
    train_disc_final_h_task_result = pool.apply_async(train_disc_final_h_task)

    train_lstm_ae_task_result.get()
    train_discHC_task_result.get()
    train_disc_final_h_task_result.get()

    mb_q_results = []
    for train_model_based_q_learning_task in train_model_based_q_learning_tasks:
        result = pool.apply_async(train_model_based_q_learning_task)
        mb_q_results.append(result)

    q_results = []
    for train_q_learning_task in train_q_learning_tasks:
        result = pool.apply_async(train_q_learning_task)
        q_results.append(result)

    generate_multimodal_dataset(model_names=model_names,episodes=max(training_episodes),root_path=root_path)
    train_policy_iteration_result = pool.apply_async(train_policy_iteration_task)
    train_r_min_result = pool.apply_async(train_r_min_task)
    train_policy_iteration_result.get()
    train_r_min_result.get()
    cleanup_task_result = pool.apply_async(cleanup_task)
    print(f'Cleaned up files: {cleanup_task_result.get()}')

    for mb_q_result in mb_q_results:
        mb_q_result.get()

    for q_result in q_results:
        q_result.get()

end = time.time()
print((end - start)/3600)
