In [1]:
import sys

import keras_tuner as kt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import callbacks
from tensorflow.keras import optimizers

from src.cross_validator import KerasTunerAllChannelsCrossValidator
from src.dataset import load_dataset_train_val_all_channels, \
    load_dataset_train_test_all_channels
from src.models import unet_builder as bare_model_builder
from src.network_utils import count_params, gaussian_kernel

PWD = '../../..'
sys.path.append(PWD)

tf.get_logger().setLevel('ERROR')

In [2]:
PROJECT_NAME = 'unet'

OVERWRITE = False

DATASET_PATH = PWD + '/data/dataset/dataset.pkl'
TRIALS_DIR = PWD + f'/data/model_selection/channel_all/tuner'
CROSSVAL_DIR = PWD + f'/data/model_selection/channel_all/cross_val'

LR = 0.01
ES_MIN_DELTA = 0.01

N_EPOCHS = 500
BATCH_SIZE = 8096
MAX_TRIALS = 40
EXECUTIONS_PER_TRIAL = 2

TOP_N = 5
CROSSVAL_N_CV = 5
CROSSVAL_N_EXEC = 2
LOSS_WEIGHT = 1000

In [3]:
X_all, _, y_all_original, _ = load_dataset_train_test_all_channels(PWD)

X_all[(2, 11)].shape, y_all_original[(2, 11)].shape

((22134, 24), (22134,))

In [4]:
X_train, X_val, y_train_original, y_val_original = load_dataset_train_val_all_channels(PWD)

X_train = np.concatenate(list(X_train.values()))
y_train_original = np.concatenate(list(y_train_original.values()))
X_val = np.concatenate(list(X_val.values()))
y_val_original = np.concatenate(list(y_val_original.values()))

rng = np.random.default_rng(seed=42)

train_idx, val_idx = np.arange(len(X_train)), np.arange(len(X_val))
rng.shuffle(train_idx)
rng.shuffle(val_idx)

X_train, y_train_original = X_train[train_idx], y_train_original[train_idx]
X_val, y_val_original = X_val[val_idx], y_val_original[val_idx]

X_train.shape, y_train_original.shape, X_val.shape, y_val_original.shape

((84824, 24), (84824,), (21208, 24), (21208,))

In [5]:
y_all = {key: np.array([gaussian_kernel(y) for y in val]) for key, val in y_all_original.items()}
y_train = np.array([gaussian_kernel(y) for y in y_train_original])
y_val = np.array([gaussian_kernel(y) for y in y_val_original])

y_all_original[(2, 11)].shape, y_train.shape, y_val.shape

((22134,), (84824, 24), (21208, 24))

# Model

In [6]:
def model_builder(hp: kt.HyperParameters) -> keras.Model:
    hp_unet_depth = hp.Int("unet_depth", min_value=0, max_value=3, step=1, default=2)
    hp_n_conv_layers = hp.Int("n_conv_layers", min_value=1, max_value=3, step=1)
    hp_filters_mult = hp.Choice("conv_filters_mult", values=[1, 2, 4, 8, 16], default=4)
    hp_spatial_dropout = hp.Choice("conv_spatial_dropout", values=[0.0, 0.1, 0.2])
    hp_batch_normalization = hp.Boolean("batch_normalization", default=False)
    hp_input_batch_normalization = hp.Boolean("input_batch_normalization", default=False)
    hp_normalize_signal = hp.Boolean("normalize_signal", default=False)

    model = bare_model_builder(hp_unet_depth, hp_n_conv_layers, hp_filters_mult, hp_spatial_dropout,
                               hp_batch_normalization, hp_input_batch_normalization, hp_normalize_signal)
    model.compile(loss='mse', optimizer=optimizers.Adam(LR), loss_weights=LOSS_WEIGHT)
    return model

In [7]:
model_builder(kt.HyperParameters()).summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 24)]         0           []                               
                                                                                                  
 reshape (Reshape)              (None, 24, 1)        0           ['input_1[0][0]']                
                                                                                                  
 conv1d (Conv1D)                (None, 24, 32)       96          ['reshape[0][0]']                
                                                                                                  
 max_pooling1d (MaxPooling1D)   (None, 12, 32)       0           ['conv1d[0][0]']                 
                                                                                              

In [8]:
model_callbacks = [
    callbacks.EarlyStopping(patience=30, min_delta=ES_MIN_DELTA),
    callbacks.ReduceLROnPlateau(monitor='loss', factor=0.9, patience=5)
]

# Bayesian tuner

In [9]:
bayesian_tuner = kt.BayesianOptimization(model_builder, objective='val_loss', executions_per_trial=EXECUTIONS_PER_TRIAL,
                                         max_trials=MAX_TRIALS, directory=TRIALS_DIR, project_name=PROJECT_NAME,
                                         overwrite=OVERWRITE)

bayesian_tuner.search(X_train, y_train, validation_data=[X_val, y_val], epochs=N_EPOCHS, callbacks=model_callbacks,
                      batch_size=BATCH_SIZE, verbose=3)

# Results

In [10]:
for i, hyperparameters in enumerate(bayesian_tuner.get_best_hyperparameters(TOP_N)):
    print(f'========== Model {i} ==========')
    print(hyperparameters.get_config()['values'])
    model_tmp = model_builder(hyperparameters)
    print('Number of parameters:', count_params(model_tmp))

{'unet_depth': 3, 'n_conv_layers': 3, 'conv_filters_mult': 1, 'conv_spatial_dropout': 0.0, 'batch_normalization': False, 'input_batch_normalization': False, 'normalize_signal': False}
Number of parameters: 46561
{'unet_depth': 3, 'n_conv_layers': 3, 'conv_filters_mult': 8, 'conv_spatial_dropout': 0.2, 'batch_normalization': True, 'input_batch_normalization': True, 'normalize_signal': False}
Number of parameters: 2962785
{'unet_depth': 2, 'n_conv_layers': 2, 'conv_filters_mult': 1, 'conv_spatial_dropout': 0.0, 'batch_normalization': True, 'input_batch_normalization': True, 'normalize_signal': False}
Number of parameters: 8433
{'unet_depth': 3, 'n_conv_layers': 3, 'conv_filters_mult': 8, 'conv_spatial_dropout': 0.1, 'batch_normalization': True, 'input_batch_normalization': False, 'normalize_signal': False}
Number of parameters: 2962689
{'unet_depth': 2, 'n_conv_layers': 2, 'conv_filters_mult': 1, 'conv_spatial_dropout': 0.0, 'batch_normalization': True, 'input_batch_normalization': True,

# Cross-validation for top 5 models

In [11]:
cross_validator = KerasTunerAllChannelsCrossValidator(bayesian_tuner, list(X_all.values()), list(y_all.values()),
                                                      model_builder, directory=CROSSVAL_DIR, project_name=PROJECT_NAME,
                                                      n_epochs=N_EPOCHS, batch_size=BATCH_SIZE, n_top=TOP_N,
                                                      es_min_delta=ES_MIN_DELTA, n_cv=CROSSVAL_N_CV,
                                                      n_executions=CROSSVAL_N_EXEC, overwrite=OVERWRITE)
model_scores = cross_validator()

{'unet_depth': 3, 'n_conv_layers': 3, 'conv_filters_mult': 1, 'conv_spatial_dropout': 0.0, 'batch_normalization': False, 'input_batch_normalization': False, 'normalize_signal': False}
Number of parameters: 46561
Got score: 12.7802 (12.7359, 12.8246)
Got score: 12.7223 (12.6500, 12.7945)
Got score: 12.6009 (12.5697, 12.6320)
Got score: 21.1731 (12.6431, 29.7031)
Got score: 12.8140 (12.7748, 12.8531)


{'unet_depth': 3, 'n_conv_layers': 3, 'conv_filters_mult': 8, 'conv_spatial_dropout': 0.2, 'batch_normalization': True, 'input_batch_normalization': True, 'normalize_signal': False}
Number of parameters: 2962785
Got score: 12.8865 (12.9393, 12.8337)
Got score: 12.6857 (12.6173, 12.7540)
Got score: 12.7433 (12.7509, 12.7357)
Got score: 12.9496 (12.8278, 13.0714)
Got score: 12.9748 (12.8309, 13.1187)


{'unet_depth': 2, 'n_conv_layers': 2, 'conv_filters_mult': 1, 'conv_spatial_dropout': 0.0, 'batch_normalization': True, 'input_batch_normalization': True, 'normalize_signal': False}
Number of parameters: 8433
Got score: 12.9644 (12.9316, 12.9972)
Got score: 12.8189 (12.8731, 12.7646)
Got score: 12.7909 (12.7433, 12.8386)
Got score: 12.7235 (12.7046, 12.7424)
Got score: 12.9701 (13.0317, 12.9086)


{'unet_depth': 3, 'n_conv_layers': 3, 'conv_filters_mult': 8, 'conv_spatial_dropout': 0.1, 'batch_normalization': True, 'input_batch_normalization': False, 'normalize_signal': False}
Number of parameters: 2962689
Got score: 52.5601 (92.0649, 13.0554)
Got score: 52.6401 (13.6434, 91.6369)
Got score: 77.8079 (94.5294, 61.0865)
Got score: 88.5235 (92.3135, 84.7334)
Got score: 82.3447 (92.2253, 72.4640)


{'unet_depth': 2, 'n_conv_layers': 2, 'conv_filters_mult': 1, 'conv_spatial_dropout': 0.0, 'batch_normalization': True, 'input_batch_normalization': True, 'normalize_signal': False}
Number of parameters: 8433
Got score: 12.8817 (12.9013, 12.8620)
Got score: 12.7747 (12.8026, 12.7468)
Got score: 12.7688 (12.7384, 12.7993)
Got score: 12.8001 (12.7645, 12.8356)
Got score: 12.9752 (12.9658, 12.9845)


In [12]:
mean_scores = [f"{np.mean(scores):0.2f}" for scores in model_scores.values()]
std_scores = [f"{np.std(scores):0.2f}" for scores in model_scores.values()]
n_params = [count_params(model_builder(hyperparameters)) for hyperparameters in
            bayesian_tuner.get_best_hyperparameters(TOP_N)]

df = pd.DataFrame({'mean': mean_scores, 'std': std_scores, 'n_params': n_params}, index=model_scores.keys())
df.index.name = 'Model'
df

Unnamed: 0_level_0,mean,std,n_params
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,14.42,3.38,46561
1,12.85,0.11,2962785
2,12.85,0.1,8433
3,70.78,15.22,2962689
4,12.84,0.08,8433
