# Demo to optimize kernel and stride in convolution operations

In [1]:
import sys
sys.path.append('..')
sys.path.append('../..')


import iterative_naive_nas

from sklearn.model_selection import train_test_split

import tensorflow.compat.v1 as tf

import numpy as np

import custom_training

import utils.losses_utils as losses

import utils.data_utils as data_utils

import viz_results as viz

import GPyOpt, GPy

import matplotlib.pyplot as plt

import copy

from scipy import spatial

from utils import gen_dims_utils as gen_dim

#30_paritions
#optimized_parameters = [3.46661820e-04, 4.01120020e-01, 9.09580986e-01, 4.13090818e-01,
# 3.93104672e-01, 8.00000000e+00, 8.20000000e+02]
#25_partitions
optimized_parameters = [6.80863834e-04, 4.68269339e-01, 4.51964628e-01, 1.80029101e-01,
 3.94141219e-01, 2.00000000e+00, 7.20000000e+02]
#21_partitions
#optimized_parameters = [1.0e-14, 1.0e-04, 1.0e+00, 1.0e-04, 0.0e+00, 1.6e+01, 8.2e+02]

learning_rate = float(optimized_parameters[0])
l1_penalization_eeg = float(optimized_parameters[1])
l1_penalization_bold = float(optimized_parameters[2])
l1_penalization_decoder = float(optimized_parameters[3])
loss_coefficient = float(optimized_parameters[4])
batch_size = int(optimized_parameters[5])
current_shape = int(optimized_parameters[6])
#eeg_hidden_shape = int(optimized_parameters[6])
#bold_hidden_shape = int(optimized_parameters[7])
#decoder_hidden_shape = int(optimized_parameters[8])

bold_shift=3
f_resample=1.8
n_partitions=25

eeg_file='../../optimized_nets/eeg/eeg_' + str(n_partitions) + '_partitions.json'
bold_file='../../optimized_nets/bold/bold_' + str(n_partitions) + '_partitions.json'
decoder_file='../../optimized_nets/decoder/decoder_' + str(n_partitions) + '_partitions.json'


def batch_to_dim(shape):
    return shape[1:]

def add_kernel_stride_hyperparameters(network, net_name):
    
    hyperparameters = []
    
    previous_layer = -1
    
    for layer in range(len(network.layers)):
        if('transpose' in network.layers[layer].name):
            if(previous_layer < 0):
                eeg_input_shape = network.input_shape[1:-1]
            else:
                eeg_input_shape = batch_to_dim(network.layers[previous_layer].get_output_shape_at(0)[:-1])
            for dim in range(len(eeg_input_shape)):
                hyperparameters += [{'name': net_name + '_l' + str(layer) + '_d' + str(dim), 'type': 'discrete',
                                    'domain': tuple(gen_dim.get_possible_kernel_size_deconv(eeg_input_shape[dim], batch_to_dim(network.layers[layer].get_output_shape_at(0))[dim]))}]

            previous_layer = layer
        
        elif('transpose' not in network.layers[layer].name and 'reshape' not in network.layers[layer].name):
            if(previous_layer < 0):
                eeg_input_shape = network.input_shape[1:-1]
            else:
                eeg_input_shape = batch_to_dim(network.layers[previous_layer].get_output_shape_at(0)[:-1])
            for dim in range(len(eeg_input_shape)):
                hyperparameters += [{'name': net_name + '_l' + str(layer) + '_d' + str(dim), 'type': 'discrete',
                                    'domain': tuple(gen_dim.get_possible_kernel_size_deconv(eeg_input_shape[dim], batch_to_dim(network.layers[layer].get_output_shape_at(0))[dim]))}]

            previous_layer = layer
            
    return hyperparameters


def get_unmutable_parameters(parameters, index_opt=0, name=None, layers_index=None):
    if(layers_index==None):
        layers_index = {}
    
    
    for i in range(len(parameters)):
        if(name in parameters[i]['name']):
            layer_index = int(parameters[i]['name'][-4])
            dim_index = int(parameters[i]['name'][-1])
            
            if(layer_index in layers_index.keys()):
                layers_index[layer_index][dim_index] = {'kernel': parameters[i]['domain'][index_opt][0], 
                                                        'stride': parameters[i]['domain'][index_opt][1]}
            else:
                layers_index[layer_index] = {dim_index: {'kernel': parameters[i]['domain'][index_opt][0], 
                                                         'stride': parameters[i]['domain'][index_opt][1]}}
    
    return layers_index

def get_models_unmutable_parameters(parameters, index_opt=np.array([[0,0,0]]), new_parameters=None):
    
    if(new_parameters == None):
        new_parameters = {'eeg': None, 'bold': None, 'decoder': None}
    
    new_parameters['eeg'] = get_unmutable_parameters(parameters, 
                                                     index_opt=int(index_opt[:,0]), 
                                                     name='eeg', 
                                                     layers_index=new_parameters['eeg'])
    new_parameters['bold'] = get_unmutable_parameters(parameters, 
                                                      index_opt=int(index_opt[:,1]), 
                                                      name='bold', 
                                                      layers_index=new_parameters['bold'])
    new_parameters['decoder'] = get_unmutable_parameters(parameters, 
                                                         index_opt=int(index_opt[:,2]), 
                                                         name='decoder', 
                                                         layers_index=new_parameters['decoder'])
    
    return new_parameters

def get_model_by_kernel_stride(network, kernel_stride, name='eeg'):
    model = tf.keras.Sequential()
    
    for layer in range(len(network.layers)):
        if('conv' in network.layers[layer].name):
            kernel_size = ()
            strides = ()
            
            #build kernel
            for dim in range(len(kernel_stride[name][layer].keys())):
                kernel_size += (kernel_stride[name][layer][dim]['kernel'], )
                strides += (kernel_stride[name][layer][dim]['stride'], )
                
            model.add(network.layers[layer].__class__(1, 
                                                      kernel_size=kernel_size, 
                                                      strides=strides, 
                                                      activation=network.layers[layer].activation))
        else:
            model.add(network.layers[layer])
            
    model.build(input_shape=network.input_shape)
            
    return model


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Extracting parameters from /home/davidcalhas/eeg_to_fmri/datasets/01/EEG/32/export/20130410320002_Segmentation_bin.vhdr...
Setting channel info structure...
Reading 0 ... 162022  =      0.000 ...   648.088 secs...
(50, 2607, 7)
Extracting parameters from /home/davidcalhas/eeg_to_fmri/datasets/01/EEG/35/export/20130424350002_Pulse_Artifact_Correction_bin.vhdr...
Setting channel info structure...
Reading 0 ... 197234  =      0.000 ...   788.936 secs...
(100, 2607, 7)
Extracting parameters from /home/davidcalhas/eeg_to_fmri/datasets/01/EEG/36/export/20130425360002_Pulse_Artifact_Correction_bin.vhdr...
Setting channel info structure...
Reading 0 ... 181949  =      0.000 ...   727.796 secs...
(150, 2607, 7)
Extracting parameters from /home/davidcalhas/eeg_to_fmri/datasets/01/EEG/37/export/20130426370002_Pulse_Artifact_Correction_bin.vhdr...
Setting channel info structure...
Reading 0 ... 195159  =      0.000 ...   780.636 secs...
(200, 2607, 7)
Extracting parameters from /home/davidcalhas/e

In [2]:
eeg_train, bold_train, eeg_val, bold_val = data_utils.load_data(list(range(10)), 
                                                                list(range(10, 12)), 
                                                                bold_shift=bold_shift, 
                                                                n_partitions=n_partitions, f_resample=f_resample,
                                                                roi=1, roi_ica_components=20)

#standardize data
eeg_train, bold_train, eeg_scaler, bold_scaler = data_utils.standardize(eeg_train, bold_train)
eeg_val, bold_val, _, _ = data_utils.standardize(eeg_val, bold_val, eeg_scaler=eeg_scaler, bold_scaler=bold_scaler)

n_voxels = bold_train.shape[1]

print("Finished Loading Data")

X_train_eeg, X_train_bold, tr_y = data_utils.create_eeg_bold_pairs(eeg_train, bold_train)
X_val_eeg, X_val_bold, tv_y = data_utils.create_eeg_bold_pairs(eeg_val, bold_val)


X_train_eeg = X_train_eeg.astype(np.float32)
X_train_bold = X_train_bold.astype(np.float32)
X_val_eeg = X_val_eeg.astype(np.float32)
X_val_bold = X_val_bold.astype(np.float32)


tr_y = np.array(tr_y, dtype=np.float32)
tv_y = np.array(tv_y, dtype=np.float32)

eeg_train = eeg_train.astype('float32')
bold_train = bold_train.astype('float32')
eeg_val = eeg_val.astype('float32')
bold_val = bold_val.astype('float32')

print("Pairs Created")

_, _, eeg_test, bold_test = data_utils.load_data(list(range(0)), list(range(12, 16)), 
                                                 bold_shift=bold_shift, 
                                                 n_partitions=n_partitions, 
                                                 f_resample=f_resample, 
                                                 roi=1, roi_ica_components=20)

eeg_test, bold_test, _, _ = data_utils.standardize(eeg_test, bold_test, eeg_scaler=eeg_scaler, bold_scaler=bold_scaler)

eeg_test = eeg_test.astype('float32')
bold_test = bold_test.astype('float32')

Extracting parameters from /home/davidcalhas/eeg_to_fmri/datasets/01/EEG/32/export/20130410320002_Segmentation_bin.vhdr...
Setting channel info structure...
Reading 0 ... 162022  =      0.000 ...   648.088 secs...
(25, 2607, 14)
Extracting parameters from /home/davidcalhas/eeg_to_fmri/datasets/01/EEG/35/export/20130424350002_Pulse_Artifact_Correction_bin.vhdr...
Setting channel info structure...
Reading 0 ... 197234  =      0.000 ...   788.936 secs...
(50, 2607, 14)
Extracting parameters from /home/davidcalhas/eeg_to_fmri/datasets/01/EEG/36/export/20130425360002_Pulse_Artifact_Correction_bin.vhdr...
Setting channel info structure...
Reading 0 ... 181949  =      0.000 ...   727.796 secs...
(75, 2607, 14)
Extracting parameters from /home/davidcalhas/eeg_to_fmri/datasets/01/EEG/37/export/20130426370002_Pulse_Artifact_Correction_bin.vhdr...
Setting channel info structure...
Reading 0 ... 195159  =      0.000 ...   780.636 secs...
(100, 2607, 14)
Extracting parameters from /home/davidcalhas

In [12]:
"""LCOMB"""


eeg_network, bold_network, decoder_network = viz.get_models_and_shapes(eeg_file=eeg_file, 
                                                                      bold_file=bold_file, 
                                                                      decoder_file=decoder_file)

#linear - -0.26785496 - converges
#relu - -0.29416773 - converges
#sigmoid - -1.4700432 - doesn't converge
#tanh - -0.119947046 - converges

eeg_network.layers[0].activation = tf.keras.activations.relu
bold_network.layers[0].activation = tf.keras.activations.relu
decoder_network.layers[0].activation = tf.keras.activations.relu


eeg_input_shape = (eeg_train.shape[1], eeg_train.shape[2], eeg_train.shape[3], eeg_train.shape[4])
bold_input_shape=(bold_train.shape[1], bold_train.shape[2], bold_train.shape[3])

multi_modal_model = custom_training.multi_modal_network(eeg_input_shape, bold_input_shape, eeg_network, bold_network, dcca=False)


validation_loss = custom_training.linear_combination_training(X_train_eeg, 
                                                            X_train_bold, 
                                                            tr_y, 
                                                            eeg_network, 
                                                            decoder_network, 
                                                            multi_modal_model, 
                                                            epochs=40, 
                                                            encoder_optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                                                            decoder_optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                                                            loss_function=losses.get_reconstruction_log_cosine_loss,
                                                            batch_size=batch_size, 
                                                            linear_combination=loss_coefficient,
                                                            X_val_eeg=X_val_eeg,
                                                            X_val_bold=X_val_bold,
                                                            tv_y=tv_y)

model_name = "LComb"

Encoder Loss:  0.17020757  || Decoder Loss:  -0.010124166 Validation Decoder Loss:  -1.0552443
Encoder Loss:  0.14676447  || Decoder Loss:  -0.044733025 Validation Decoder Loss:  -0.87587005
Encoder Loss:  0.16261142  || Decoder Loss:  -0.093693346 Validation Decoder Loss:  -0.6989015
Encoder Loss:  0.18167289  || Decoder Loss:  -0.1315524 Validation Decoder Loss:  -0.48065498
Encoder Loss:  0.19646579  || Decoder Loss:  -0.16001979 Validation Decoder Loss:  -0.37717324
Encoder Loss:  0.20483561  || Decoder Loss:  -0.17668647 Validation Decoder Loss:  -0.31909654
Encoder Loss:  0.21047658  || Decoder Loss:  -0.18785256 Validation Decoder Loss:  -0.28698
Encoder Loss:  0.21375448  || Decoder Loss:  -0.19439176 Validation Decoder Loss:  -0.27822572
Encoder Loss:  0.21608122  || Decoder Loss:  -0.19980654 Validation Decoder Loss:  -0.26055348
Encoder Loss:  0.21853417  || Decoder Loss:  -0.20480274 Validation Decoder Loss:  -0.23926584
Encoder Loss:  0.22064428  || Decoder Loss:  -0.20997

### Get possible kernel and stride combinations according to input and output shapes

In [None]:
hyperparameters = []
            
hyperparameters += add_kernel_stride_hyperparameters(eeg_network, 'eeg_net')
hyperparameters += add_kernel_stride_hyperparameters(bold_network, 'bold_net')
hyperparameters += add_kernel_stride_hyperparameters(decoder_network, 'decoder_net')

hyperparameters = list(hyperparameters)

set_parameters = []

all_checked=False
while(not all_checked):
    for i in range(len(hyperparameters)):

        if(len(hyperparameters[i]['domain']) == 1):
            print(hyperparameters[i].copy())
            set_parameters += [hyperparameters[i].copy()]
            hyperparameters.pop(i)
            break
        elif(i < len(hyperparameters)-1):
            continue
            
        all_checked = True

hyperparameters_index = []

for param in hyperparameters:
    index_param = param.copy()
    index_param['domain'] = tuple(range(len(index_param['domain'])))
    hyperparameters_index += [index_param]

In [None]:
kernel_stride_BO(hyperparameters_index, 
                 hyperparameters, 
                 optimized_parameters,
                 get_models_unmutable_parameters(set_parameters), 
                 eeg_network, 
                 bold_network, 
                 decoder_network)

In [None]:
def kernel_stride_BO(indexes, hyperparameters, optimized_parameters, unmutable, eeg_network, bold_network, decoder_network):

    def bayesian_optimization_function(x):
        
        test_parameters = get_models_unmutable_parameters(hyperparameters, 
                                                          index_opt=x, 
                                                          new_parameters=copy.deepcopy(unmutable))
        
        model_name = 'bold_synthesis_net_lr'
        
        dcca=False


        ######################################################################################################
        #
        #										DEFINING ARCHITECTURES
        #
        ######################################################################################################

        global X_train_eeg, X_train_bold, X_val_bold, X_val_eeg, tv_y, tr_y, eeg_train, bold_train, eeg_val, bold_val, eeg_network, bold_network, decoder_network, optimized_parameters

        current_learning_rate = float(optimized_parameters[0])
        current_loss_coefficient = float(optimized_parameters[4])
        current_batch_size = int(optimized_parameters[5])
        
        new_eeg_net = get_model_by_kernel_stride(eeg_network, test_parameters, name='eeg')
        new_bold_net = get_model_by_kernel_stride(bold_network, test_parameters, name='bold')
        new_decoder_net = get_model_by_kernel_stride(decoder_network, test_parameters, name='decoder')
        
        eeg_input_shape = (X_train_eeg.shape[1], X_train_eeg.shape[2], X_train_eeg.shape[3], 1)
        bold_input_shape = (X_train_bold.shape[1], X_train_bold.shape[2], 1)
        
        #Joining EEG and BOLD branches
        multi_modal_model = custom_training.multi_modal_network(eeg_input_shape, 
                                                                bold_input_shape, 
                                                                new_eeg_net, 
                                                                new_bold_net, 
                                                                dcca=dcca, 
                                                                corr_distance=True)

        ######################################################################################################
        #
        #										RUN TRAINING SESSION
        #
        ######################################################################################################
        print("Starting training")		

        #exception can appear
        validation_loss = custom_training.linear_combination_training(X_train_eeg, 
                                                                      X_train_bold, 
                                                                      tr_y, 
                                                                      new_eeg_net, 
                                                                      new_decoder_net, 
                                                                      multi_modal_model,
                                                                      epochs=40, 
                                                                      encoder_optimizer=tf.keras.optimizers.Adam(learning_rate=current_learning_rate),
                                                                      decoder_optimizer=tf.keras.optimizers.Adam(learning_rate=current_learning_rate),
                                                                      loss_function=losses.get_reconstruction_cosine_loss,
                                                                      batch_size=current_batch_size, linear_combination=current_loss_coefficient,
                                                                      X_val_eeg=X_val_eeg,
                                                                      X_val_bold=X_val_bold,
                                                                      tv_y=tv_y)

        print("Model: " + model_name +
        ' Train Intances: ' + str(len(X_train_bold)) + ' | Validation Instances: ' + str(len(X_val_bold)) +  ' | Validation Loss: ' + str(validation_loss))
        
        return validation_loss

    optimizer = GPyOpt.methods.BayesianOptimization(f=bayesian_optimization_function, 
                                                    domain=indexes, 
                                                    model_type="GP_MCMC", 
                                                    acquisition_type="EI_MCMC")

    print("Started Optimization Process")
    optimizer.run_optimization(max_iter=100)
    
    test_parameters = get_models_unmutable_parameters(hyperparameters, 
                                                      index_opt=optimizer.fx_opt, 
                                                      new_parameters=copy.deepcopy(unmutable))
    
    new_eeg_net = get_model_by_kernel_stride(eeg_network, test_parameters, name='eeg')
    new_bold_net = get_model_by_kernel_stride(bold_network, test_parameters, name='bold')
    new_decoder_net = get_model_by_kernel_stride(decoder_network, test_parameters, name='decoder')

    if(not (new_eeg_net and new_bold_net and new_decoder_net)):
        return None, None, None, None

    print("Optimized Parameters: {0}".format(optimizer.x_opt))
    print("Optimized Validation Decoder Loss: {0}".format(optimizer.fx_opt))
    print("\n\n\n\n\n\n\n\n\n\n")

    return new_eeg_net, new_bold_net, new_decoder_net, optimizer.fx_opt



kernel_stride_BO(hyperparameters_index, 
                 hyperparameters, 
                 optimized_parameters,
                 get_models_unmutable_parameters(set_parameters), 
                 eeg_network, 
                 bold_network, 
                 decoder_network)