# **Installing required dependencies**

Before installing dependencies, recall to set the "Runtime"

In [None]:
# Installing the necessary dependencies before beginning 

print("Setting up colab environment")
!pip uninstall -y -q pyarrow
!pip install -q -U ray[tune]
!pip install -q ray[debug]

# # A hack to force the runtime to restart, needed to include the above dependencies.
print("Done installing! Restarting via forced crash.")
import os
os._exit(0)

Setting up colab environment


**# Setting the path (IMPORTANT!!!)**

In [2]:
# Setting the path
import os
os.chdir('/content/drive/MyDrive/RecAE') # /content/drive/MyDrive/RecAE

# **Data downloading and partitioning**


In [3]:
!python utils/data_preparation.py -h

usage: data_preparation.py [-h] download perc_tr_n perc_val_n perc_val_an

Data downloading and partitioning

positional arguments:
  download     Download data 1, otherwise 0
  perc_tr_n    Percentage of normal instances to be placed in the training
               set.
  perc_val_n   Percentage of normal instances to be placed in the validation
               set:half of these are used to control model training, the
               remaining ones for model selection.
  perc_val_an  Percentage of anomalous instances w.r.t. normal instances in
               the training setused for model selection(e.g, if the training
               set contains 95 normal instances, if you set this parameter
               equal to 0.05, then,5 anomalous instances will be selected).The
               remamining anomalous instances are placed in the test set.

optional arguments:
  -h, --help   show this help message and exit


In [2]:
!python utils/data_preparation.py 1 0.5 0.5 0.05

### Starting downloading ECG5000 data ###
### Download done! ###
Extracting all the files now...
Extraction done!
Saved data in numpy
Data preparation done!


# **Model training and grid search**

For model training with grid search, you can run the following cell, alternatively you can open the file "main_tune.py", set your configuration and then running 


```
!python main_tune.py
```






In [None]:
import numpy as np
import pandas as pd
import os
from ray import tune
from easydict import EasyDict
from functools import partial

import torch
import torch.nn as nn
import torch.optim as optim

from agents.rnn_autoencoder import RecurrentAEAgent
from graphs.models.recurrent_autoencoder import RecurrentAE
from datasets.common_loader import RecAEDataLoader 

import warnings
warnings.filterwarnings("ignore")

# Project configuration
config_rnn_ae = {

    # Experiment information
    "agent": "RecurrentAEAgent", # Fisso

    # Architecture hyperparameters
    "rnn_type": "GRU", # LSTM, RNN
    "rnn_act": "None", # Da specificare solo se uso RNN
    "n_features": 1, # Number of different input signals

    # Optimization hyperparameters
    "batch_size_val": 256,
    "max_epoch": 2000, # Da mettere alto

    # AUC hyperparameters
    'sampler_random_state': 88, # Da non 

    # Folder where to retrieve the data and their names (IMPORTANT: it must be a global dir)
    "data_folder":  "/content/drive/MyDrive/RecAE/data/ECG5000/numpy/",
    "X_train": "X_train.npy",
    "y_train": "y_train.npy",
    "X_train_p": "X_train_p.npy",
    "y_train_p": "y_train_p.npy",
    "X_val": "X_val.npy",
    "y_val": "y_val.npy",
    "X_test": "X_test.npy",
    "y_test": "y_test.npy",
    "X_val_p": "X_val_p.npy",
    "y_val_p": "y_val_p.npy",

    # GPU settings
    "cuda": True, # True or False
    "device": "cuda", # se non si usa la GPU mettere "cpu"
    "gpu_device": 0, # Non toccare
    "seed": 58, # Non toccare

    # Tune
    'tune': True # Necessario per fare grid search
}

# From dict to easydict
config_rnn_ae = EasyDict(config_rnn_ae)


# First two parameters must be "config" and "checkpoint_dir"
def tune_model(config, checkpoint_dir = None, config_rnn_ae = None):
    
    loss_type, lambda_reg = config['loss_param']
    config['loss_type'] = loss_type
    config['lambda_reg'] = lambda_reg

    # TO DO add the following to directly to the agent class 
    if config["loss_type"] == 'MAE':
      config_rnn_ae.training_type = 'one_class'
    else:
      config_rnn_ae.training_type = 'more_class'

    # Create an instance of the agent
    agent = RecurrentAEAgent(config_rnn_ae)

    # Create an instance from the data loader
    agent.data_loader = RecAEDataLoader(config["batch_size"], agent.config) 

    # Setting the model
    agent.model = RecurrentAE(config["latent_dim"], agent.config)
    agent.model.to(agent.device)

    # Setting the loss
    agent.loss = agent.possible_loss[config["loss_type"]]
    agent.loss.to(agent.device)

    # Setting the optimizer
    agent.optimizer = torch.optim.Adam(agent.model.parameters(), lr = config["lr"])
    agent.train_tune(config['lambda_reg'], checkpoint_dir)

    # Finalizing
    agent.finalize_tune(checkpoint_dir)   
    perf = agent.best_valid

    # Metric to be reported by tune
    tune.report(mean_accuracy = perf)


# Folder where to save experiments the results
my_dir = "/content/drive/MyDrive/RecAE/experiments"

# Project name
project_name ='ECG_5000' # Give a name like the dataset

# Creating nested conditional grid 
def _iter_loss():
    for loss in ['MAE','MAEAUC']:
        if loss == 'MAE':
            yield loss, 0
        else:
            for lambda_reg in [0.001,0.01, 0.1, 1, 10]:
                yield loss, lambda_reg

analysis = tune.run(partial(tune_model, config_rnn_ae = config_rnn_ae), 
                config = {"latent_dim": tune.grid_search([35, 70, 105]),
                          "lr": tune.grid_search([0.001, 0.01]),
                          "batch_size": tune.grid_search([256]),
                          "loss_param": tune.grid_search(list(_iter_loss()))}, 
                resources_per_trial = {"cpu": 2, "gpu": 1}, 
                name = project_name, 
                local_dir = my_dir) #, resume = True)


# Saving all results
df = analysis.dataframe()
df.to_pickle(my_dir + '/df_analysis.pkl')