### Sequential Model Based Optimization using the Tree Parzen Estimator for neural network model

There are four parts to an optimization problem:

1. Objective function: what we want to minimize
2. Domain space: values of the parameters over which to minimize the objective
3. Hyperparameter optimization function: constructs the surrogate function and chooses next values to evaluate
4. Trials: score, parameter pairs recorded each time we evaluate the objective function

In [1]:
# From python
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# From torch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# From optuna
import optuna

# From CplAE_MET
from cplAE_MET.models.train_tempcsfeatures_dev import set_paths, init_losses
from cplAE_MET.utils.dataset import MET_exc_inh_v2
from cplAE_MET.models.model_classes import Model_ME_T_v2
from cplAE_MET.models.torch_utils import MET_dataset_v2
from cplAE_MET.models.torch_utils import tonumpy
from cplAE_MET.models.classification_functions import run_QDA



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
batch_size=1000
n_epochs=10

# Read data and train test split -----------
dir_pth = set_paths("config.toml", exp_name="test", fold_n=0)
dat = MET_exc_inh_v2.from_file(dir_pth['MET_data'])
train_ind, val_ind = dat.train_val_split(fold=0, n_folds=10, seed=0)
train_dat = dat[train_ind,:]
val_dat = dat[val_ind,:]
T_labels = dat.cluster_label

# Dataset and Dataloader -----------
train_dataset = MET_dataset_v2(train_dat, device=device)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

val_dataset = MET_dataset_v2(val_dat, device=device)
val_dataloader = DataLoader(val_dataset, batch_size=len(val_dataset), shuffle=False)

dataset = MET_dataset_v2(dat, device=device)
dataloader = DataLoader(dataset, batch_size=len(dataset), shuffle=False)

In [3]:
def Criterion(x, loss_dict):
    """Objective function to minimize"""

    criterion =  loss_dict['rec_t'] + loss_dict['rec_e'] + x * ( 0.5 * loss_dict['cpl_t->e'] + 0.5 * loss_dict['cpl_e->t']) + \
                 0 * (loss_dict['rec_m'] + (loss_dict['rec_m_me'] + loss_dict['rec_e_me']) + loss_dict['cpl_t->m'] + loss_dict['cpl_m->t'] + \
                 loss_dict['cpl_e->m'] + loss_dict['cpl_m->e'] +  loss_dict['cpl_t->me'] + loss_dict['cpl_me->t'] +  loss_dict['cpl_me->m'] + \
                 loss_dict['cpl_m->me'] + loss_dict['cpl_me->e'] + loss_dict['cpl_e->me']) 
    return criterion

def build_model(params):
    model_config = dict(latent_dim=3, 
                     batch_size=batch_size,
                     augment_decoders=0,
                     T=dict(dropout_p=0.2, 
                            alpha_T=1),
                     E=dict(gnoise_std=train_dataset.gnoise_e_std,
                            gnoise_std_frac=0.05, 
                            dropout_p=0.2, 
                            alpha_E=1),
                     M=dict(gnoise_std=train_dataset.gnoise_m_std,
                            gnoise_std_frac=0.005, 
                            dropout_p=0.2, 
                            alpha_M=0),
                     TE=dict(lambda_TE=params['lambda_TE'], lambda_tune_T_E=0.5, lambda_tune_E_T=0.5),
                     TM=dict(lambda_TM=0, lambda_tune_T_M=0, lambda_tune_M_T=0),
                     ME=dict(alpha_ME=0, lambda_ME=0, lambda_tune_M_E=0, lambda_tune_E_M=0),
                     ME_T=dict(lambda_ME_T=0, lambda_tune_ME_T=0, lambda_tune_T_ME=0),
                     ME_M=dict(lambda_ME_M=0, lambda_tune_ME_M=0, lambda_tune_M_ME=0), 
                     ME_E=dict(lambda_ME_E=0, lambda_tune_ME_E=0, lambda_tune_E_ME=0),
                     )  


    model = Model_ME_T_v2(model_config)
    return model

In [8]:
def train_and_evaluate(param, model):

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    model.to(device)

    # Training -----------
    for epoch in range(10):
        model.train()
        for step, batch in enumerate(iter(train_dataloader)):
            optimizer.zero_grad()
            # forward pass T, E -----------
            loss_dict, _, _ = model(batch)
            loss = Criterion(param['lambda_TE'], loss_dict)

            loss.backward()
            optimizer.step()

            if step == 0:
                train_loss = init_losses(loss_dict)
        
            # track loss over batches -----------
            for k, v in loss_dict.items():
                train_loss[k] += v

        # Validation -----------
        with torch.no_grad():
            for val_batch in iter(val_dataloader):
                model.eval()
                val_loss, _, _ = model(val_batch)
                
            # Compute classification acc from the model latent dim for all data
            for all_data in iter(dataloader):
                _, z_dict, _ = model(all_data) 
            
            isT_1d = dat.isT_1d
            isTE_1d = np.logical_and(dat.isT_1d, dat.isE_1d)
            zt_classification_acc, n_class, clf = run_QDA(tonumpy(z_dict['zt'])[isT_1d], 
                                                          T_labels[isT_1d],
                                                          train_test_ids={'train':[i for i in train_ind if isT_1d[i]], 
                                                                          'val':[i for i in val_ind if isT_1d[i]]})

            print("acc on the zt:", zt_classification_acc, "number of classes:", n_class)
            
            ze_classification_acc , n_class, clf = run_QDA(z_dict['ze'][isTE_1d], 
                                                           T_labels[isTE_1d],
                                                           test_size= 0.1)
            
            print("acc on the ze:", ze_classification_acc, "number of classes:", n_class)


        model.train()

        # Average losses over batches -----------
        for k, v in train_loss.items():
            train_loss[k] = train_loss[k] / len(train_dataloader)

    return ze_classification_acc

    
def objective(trial):
    params = {'lambda_TE': trial.suggest_float('lambda_TE', 0, 10)}
    model = build_model(params)
    accuracy = train_and_evaluate(params, model)
    return accuracy


In [9]:
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=10)

[32m[I 2023-01-04 11:53:12,646][0m A new study created in memory with name: no-name-db9148f6-5371-433f-91d1-b84d51727e83[0m
[33m[W 2023-01-04 11:53:16,768][0m Trial 0 failed because of the following error: TypeError("can't convert cuda:1 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.")[0m
Traceback (most recent call last):
  File "/home/fahimehb/miniconda3/envs/cplae_met/lib/python3.8/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_3808220/485917395.py", line 63, in objective
    accuracy = train_and_evaluate(params, model)
  File "/tmp/ipykernel_3808220/485917395.py", line 44, in train_and_evaluate
    ze_classification_acc , n_class, clf = run_QDA(z_dict['ze'][isTE_1d],
  File "/home/fahimehb/Local/new_codes/cplAE_MET/cplAE_MET/models/classification_functions.py", line 85, in run_QDA
    clf.fit(X_train, y_train)
  File "/home/fahimehb/miniconda3/envs/cplae_met/lib

acc on the zt: 24.427480916030532 number of classes: 52


TypeError: can't convert cuda:1 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [25]:
isT_1d = dat.isT_1d
classification_acc, n_class, clf = run_QDA(dat.XT[isT_1d, 0:83], 
                                           T_labels[isT_1d],
                                           train_test_ids={'train':[i for i in train_ind if isT_1d[i]], 
                                                           'val':[i for i in val_ind if isT_1d[i]]})



In [26]:
print(classification_acc, n_class)

40.458015267175576 52


In [27]:
isTE_1d = np.logical_and(dat.isT_1d, dat.isE_1d)
clf.score(np.nan_to_num(dat.XE[isTE_1d]), T_labels[isTE_1d])

  score = y_true == y_pred


0.0

In [23]:
np.isnan(np.nan_to_num(dat.XE[isTE_1d])).sum()

0