# Hyperparameter Optimization

In [1]:
import numpy as np
import torch
import optuna
import os
import pickle
import copy

import sys
sys.path.append('./data')
from datautil import create_cross_loaders,sys_separation

#Train RUL_estimation
from architectures import RUL_estimation as arch 
from training_decoder import train_estimation as train
from training_decoder import evaluate_estimation as evaluate

#Train RUL_transition
# from architectures import RUL_transition as arch 
# from training_decoder import train_transition as train
# from training_decoder import evaluate_transition as evaluate


exp='try'#'estimation_noisy0'# 'transition3'#
noisy=True

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


## Experiment

In [3]:
#Data
data='Rectifier'#'-Rectifier_300Diode'
dataset_name='Rectifier'#'RectifierRUL'# 
exp_address=f'./Experiments/{data}/{dataset_name}_{exp}'

## Load the dataset

In [4]:
with open(f'./data/{data}/dataset.pkl', 'rb') as file:
    dataset = pickle.load(file)

In [5]:
print(f'Dataset X shape: {dataset.x.shape}') 
print(f'Dataset Y shape: {dataset.y.shape}') 
print(f'Number of systems: {dataset.n_sys}') 

Dataset X shape: (51200, 9)
Dataset Y shape: (51200,)
Number of systems: 200


## Hyper-parameter Optimization

In [6]:
loss='MSE'
weighted=False
N_folds=5

look_back_list=[k+1 for k in range(15)]
batch_size_list=[64] #[2**k for k in range(3,11)]
def scoring(x,y,a=10,b=13):
    s=x-y
    return np.exp(s / a)*(s > 0) +np.exp(-s /b)*(s <= 0)-1

look_back=30
batch_size=64


def objective(trial): 
    # model parameters   
    rul_head_dim=trial.suggest_categorical('rul_head_dim',[32, 64, 128, 256,512,1024]) #256 256 256 64
    n_head=trial.suggest_categorical('n_head',[1,2,4,6,8,9,10,11,12])                               #2   4  2 1
    param=trial.suggest_categorical('embed_dim/n_head',[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])        #7 2 5 5 
    embed_dim=param*n_head 
    dim_feedforward=trial.suggest_categorical('dim_feedforward',[16,32, 64, 128, 256, 512,1024])   #32 #32 32 512
    
    #training parameters
    if exp=='try':
        N_epoch=1
    else:
        N_epoch=trial.suggest_int('N_epoch',15,30) 
    
    if noisy:
        noise_strength_ini=trial.suggest_float('noise_strength_ini',0,20)   
        noise_strength_end=trial.suggest_float('noise_strength_end',0,20)   
        noise_bound_ini=trial.suggest_float('noise_bound_ini',0,1.2)    
        noise_bound_end=trial.suggest_float('noise_bound_end',0,1.2)    
        noise=(noise_strength_ini,noise_bound_ini,noise_strength_end,noise_bound_end) 
    else:
        noise=False

    lr=trial.suggest_float('lr',1e-5,1e-3)  
    #trial.suggest_categorical('batch_size',batch_size_list)
    #look_back=trial.suggest_categorical('look_back',[8, 9, 10, 11, 12, 13, 14, 15]) #14 15 12 14
    

####################################################333\\
    #create loaders randomly  
    train_loaders,test_loaders=create_cross_loaders(dataset,N_folds=N_folds,window=look_back+1,batch_size=batch_size,scored=noisy,shuffle=True) #,train_stats
    #Training parameters 
    config={'n_epoch':N_epoch,#500
                'lr':  lr,#1e-4
                'a':10,#1 10
                'b':13, #20 13
                'alpha':1,
                'war':200,
                'Floss':loss,
                'weighted':weighted}
    
    y_preds=[]
    y_trues=[] ##### mejorar esto
    weights=[]
    arrays=[]
    for fold in range(N_folds):
        
        #create the model
        model=arch(look_back=look_back,
                    n_features=dataset.n_features,
                    embed_dim=embed_dim,
                    rul_head_dim=rul_head_dim,
                    dim_feedforward=dim_feedforward,
                    n_head=n_head).to(device).double() 
        
        #train model on fold
        model_trained=train(model,train_loaders[fold],config,noise_coef=noise)
        
        #evaluate the model
        model_trained.eval()
        y_pred,y_true=evaluate(model_trained,test_loaders[fold])     
        weight=copy.deepcopy(model_trained.state_dict()) 

        #save folds predictions
        y_preds.append(y_pred)
        y_trues.append(y_true)
        weights.append(weight)
        arrays.append(test_loaders[fold].dataset.sys_array)

    #Stack predictions on folds
    y_pred=np.concatenate(y_preds,axis=0)
    y_true=np.concatenate(y_trues,axis=0)
    array=np.concatenate(arrays)
    
    #System separation
    y_pred=sys_separation(y_pred,array)
    y_true=sys_separation(y_true,array)
    
    #RMSE
    RMSE_sys=np.sqrt(np.nanmean((y_pred-y_true)**2,axis=tuple(range(1,y_pred.ndim))))
    RMSE_mean=np.mean(RMSE_sys,axis=0)
    #RMSE_std=np.std(RMSE_sys,axis=0)

    #scoring
    # Nasa_scoring_sys=np.nanmean(scoring(y_pred,y_true),axis=1)
    # Nasa_scoring_mean=np.nanmean(Nasa_scoring_sys,axis=0)
    # Nasa_scoring_std=np.nanstd(Nasa_scoring_sys,axis=0)
    
    # score=0.5*RMSE_mean+0.5*Nasa_scoring_mean
    
    if trial.number==0 or study.best_trial.value >= RMSE_mean:
        torch.save(train_loaders, f'{exp_address}/train_loaders.pkl')
        torch.save(test_loaders, f'{exp_address}/test_loaders.pkl')
        torch.save(weights, f'{exp_address}/best_model.pt')

    return RMSE_mean

In [7]:
study_name = exp_address+'/estudio' #f"{dataset_name}_test{test}"
storage_name = "sqlite:///{}.db".format(study_name)
os.makedirs(f'{exp_address}', exist_ok=True)
study = optuna.create_study(study_name=study_name, storage=storage_name, load_if_exists=True)
len(study.trials)

[I 2023-10-17 16:03:17,988] A new study created in RDB with name: ./Experiments/Rectifier/Rectifier_try/estudio


0

In [8]:
n_trials=1
study.optimize(objective, n_trials=n_trials-len(study.trials), show_progress_bar=True)

  0%|          | 0/1 [00:00<?, ?it/s]

[I 2023-10-17 16:03:47,046] Trial 0 finished with value: 37.01370448522369 and parameters: {'rul_head_dim': 1024, 'n_head': 11, 'embed_dim/n_head': 5, 'dim_feedforward': 16, 'noise_strength_ini': 1.199843520875652, 'noise_strength_end': 15.761147928313857, 'noise_bound_ini': 0.875351995441558, 'noise_bound_end': 1.1150757671116878, 'lr': 0.0007472836134480432}. Best is trial 0 with value: 37.01370448522369.


In [9]:
study.best_trial.params

{'rul_head_dim': 1024,
 'n_head': 11,
 'embed_dim/n_head': 5,
 'dim_feedforward': 16,
 'noise_strength_ini': 1.199843520875652,
 'noise_strength_end': 15.761147928313857,
 'noise_bound_ini': 0.875351995441558,
 'noise_bound_end': 1.1150757671116878,
 'lr': 0.0007472836134480432}

In [10]:
study.best_trial.value

37.01370448522369