In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['KMP_DUPLICATE_LIB_OK']='True'

import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D, AveragePooling1D, concatenate
from tensorflow.keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, Reshape, multiply
from tensorflow.keras.layers import ConvLSTM2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import GlobalAveragePooling3D

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras import regularizers
from tensorflow.keras import backend as K

import optuna

from datetime import datetime
import random 

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
random.seed(0)

import pickle
with open("../preproccessed_data.pickle", 'rb') as f:
    data = pickle.load(f)
    
train = data['train']
train_target = data['train_target']
validation = data['validation']
validation_target = data['validation_target']
test = data['test']
test_target = data['test_target']

# search with a subset of the data
from sklearn.model_selection import train_test_split
train, _, train_target, _ = train_test_split(train, train_target, test_size=0.8, stratify=train_target, random_state=0)
validation, _, validation_target, _ = train_test_split(validation, validation_target, test_size=0.8, stratify=validation_target, random_state=0)


### optuna

In [5]:
classes = list(set(train_target))

def squeeze_excite_block(filters,input):                      
    se = GlobalAveragePooling1D()(input)
    se = Reshape((1, filters))(se) 
    se = Dense(filters//16, activation='relu')(se)
    se = Dense(filters, activation='sigmoid')(se)
    se = multiply([input, se])
    return se

def build_model(trial):
    
    num_layers = trial.suggest_int("num_layers", 2, 6)
    n_filters = trial.suggest_int(f"n_filters", 16, 128, step=8)
    kernel_size = trial.suggest_int(f"kernel_size", 4, 10, step=1)
    pool_size = trial.suggest_int(f"pool_size", 2, 4, step=1)
    kernel_regulizer_l2 = trial.suggest_float('kernel_regulizer_l2', 1e-7, 1e-4, log=True)
    dropout = trial.suggest_float(f'dropout', 0, 0.8, step=0.05)
    dense_dropout = trial.suggest_float(f'dropout_dense', 0, 0.8, step=0.05)
    


    input_x = Input(shape=(1500,1,))
    for i in range(num_layers):
        if i == 0:
            x = Conv1D(filters=n_filters*(2**i),
                    kernel_size=kernel_size,
                    kernel_initializer = 'lecun_normal',
                    kernel_regularizer=regularizers.l2(kernel_regulizer_l2),
                    padding='same')(input_x)
        else:
            x = Conv1D(filters=n_filters*(2**i),
                    kernel_size=kernel_size,
                    kernel_initializer = 'lecun_normal',
                    kernel_regularizer=regularizers.l2(kernel_regulizer_l2),
                    padding='same')(x)
        
        x = BatchNormalization()(x)
        x = squeeze_excite_block(n_filters*(2**i),x)
        x = Activation('relu')(x)
        if (i+1) % 2 == 0: # every two layers one pooling
            x = AveragePooling1D(pool_size=pool_size)(x)
        x = Dropout(dropout)(x)
    
    x = GlobalAveragePooling1D()(x)
    x = Dense(100, kernel_initializer = 'lecun_normal', kernel_regularizer=regularizers.l2(kernel_regulizer_l2))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dense_dropout)(x)
    x = Dense(100, kernel_initializer = 'lecun_normal', kernel_regularizer=regularizers.l2(kernel_regulizer_l2))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dense_dropout)(x)
    x = Dense(20, kernel_initializer = 'lecun_normal', kernel_regularizer=regularizers.l2(kernel_regulizer_l2))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    Y_HAT = Dense(4, activation="softmax")(x)
    model = Model(inputs=input_x, outputs=Y_HAT)
    
    return model

def batch_generator(X, y, batch_size):
    num_samples = X.shape[0]
    num_batches = num_samples // batch_size
    if num_samples % batch_size:
        num_batches += 1

    while True:  # Loop forever, the generator never ends
        for i in range(num_batches):
            start = i * batch_size
            end = min((i+1) * batch_size, num_samples)
            yield X[start:end], y[start:end]




def objective(trial):
    model = build_model(trial)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    print("model parameters: {}K".format(model.count_params()//1000))
    batch_size = trial.suggest_categorical("batch_size", [128, 256, 512, 1024])
    generator = batch_generator(train, train_target, batch_size=batch_size)
    
    for step in range(100):
        num_batches = len(train) // batch_size
        for batch_id in range(num_batches):
            X_batch, y_batch = next(generator)
            # Now you can use X_batch and y_batch to train your model
            model.train_on_batch(X_batch, y_batch)
            
        
            # Progress bar
            print('\r',f"epoch {step}/100", 'Training progress: ', '[{0}{1}]'.format('#' * ((batch_id+1) * 50 // num_batches), '.' * (50 - ((batch_id+1) * 50 // num_batches))), f' {((batch_id+1) * 100 // num_batches)}%', end='')
        
    
        # Calculate the intermediate value by using evaluate instead of score
        train_loss, train_intermediate_value = model.evaluate(train, train_target, verbose=0)
        val_loss, val_intermediate_value = model.evaluate(validation, validation_target, verbose=0)
        
        
        trial.report(val_intermediate_value, step)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            print("pruned")
            raise optuna.TrialPruned()
            
        
        print(f" | train_acc={train_intermediate_value:.2f}, val_acc={val_intermediate_value:.2f}")

    # Calculate the final value using evaluate instead of score
    loss, final_value = model.evaluate(validation, validation_target, verbose=0)
    return final_value  # return accuracy

In [8]:
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(), pruner=optuna.pruners.HyperbandPruner())
study.optimize(objective, n_trials=5)

best_trial = study.best_trial

print("Best trial:")
print(" Value: ", best_trial.value)
print(" Params: ")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2023-07-14 13:00:55,607] A new study created in memory with name: no-name-62e8c331-dc33-44a0-9cb4-b7d284646e5d


model parameters: 57K
 epoch 0/100 Training progress:  [##################################################]  100% | train_acc=0.35, val_acc=0.35
 epoch 1/100 Training progress:  [##################################################]  100% | train_acc=0.48, val_acc=0.44
 epoch 2/100 Training progress:  [##################################################]  100% | train_acc=0.50, val_acc=0.46
 epoch 3/100 Training progress:  [##################################################]  100% | train_acc=0.50, val_acc=0.45
 epoch 4/100 Training progress:  [##################################################]  100% | train_acc=0.53, val_acc=0.49
 epoch 5/100 Training progress:  [##################################################]  100% | train_acc=0.54, val_acc=0.51
 epoch 6/100 Training progress:  [##################################################]  100% | train_acc=0.53, val_acc=0.49
 epoch 7/100 Training progress:  [##################################################]  100% | train_acc=0.55, val_acc

[I 2023-07-14 13:21:07,444] Trial 0 finished with value: 0.5456249713897705 and parameters: {'num_layers': 2, 'n_filters': 48, 'kernel_size': 7, 'pool_size': 2, 'kernel_regulizer_l2': 3.8593047842255e-05, 'dropout': 0.7000000000000001, 'dropout_dense': 0.55, 'learning_rate': 0.0039190176239533685, 'batch_size': 256}. Best is trial 0 with value: 0.5456249713897705.


model parameters: 308K
 epoch 0/100 Training progress:  [##################################################]  100% | train_acc=0.36, val_acc=0.31
 epoch 1/100 Training progress:  [##################################################]  100% | train_acc=0.29, val_acc=0.27
 epoch 2/100 Training progress:  [##################################################]  100% | train_acc=0.33, val_acc=0.30
 epoch 3/100 Training progress:  [##################################################]  100% | train_acc=0.36, val_acc=0.33
 epoch 4/100 Training progress:  [##################################################]  100% | train_acc=0.38, val_acc=0.33
 epoch 5/100 Training progress:  [##################################################]  100% | train_acc=0.40, val_acc=0.36
 epoch 6/100 Training progress:  [##################################################]  100% | train_acc=0.41, val_acc=0.37
 epoch 7/100 Training progress:  [##################################################]  100% | train_acc=0.44, val_ac

[W 2023-07-14 13:37:58,647] Trial 1 failed with parameters: {'num_layers': 2, 'n_filters': 120, 'kernel_size': 9, 'pool_size': 3, 'kernel_regulizer_l2': 5.987997722234362e-06, 'dropout': 0.2, 'dropout_dense': 0.6000000000000001, 'learning_rate': 0.0003759489996569065, 'batch_size': 1024} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/rezmiry/venv/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_87878/1367354676.py", line 96, in objective
    model.train_on_batch(X_batch, y_batch)
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras/engine/training.py", line 2512, in train_on_batch
    logs = tf_utils.sync_to_numpy_or_python_type(logs)
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras/utils/tf_utils.py", line 680, in sync_to_numpy_or_python_type
    return tf.nest.map_structure(_to_single_numpy_or_python_type, tensors)
  Fil

KeyboardInterrupt: 

### train best classifier conv1d

In [15]:
def build_model(hp):
    input_x = Input(shape=(1500,1,))
    num_layers = 7
    n_filters = 56
    kernel_size = 10
    pool_size = 8
    kernel_regulizer_l2 = 4.4087e-06
    dropout = 0.7
    dense_dropout = 0.35
    learning_rate= 0.0057992
    


    for i in range(num_layers):
        if i == 0:
            x = Conv1D(filters=n_filters*(2**i),
                    kernel_size=kernel_size,
                    kernel_initializer = 'lecun_normal',
                    kernel_regularizer=regularizers.l2(kernel_regulizer_l2),
                    padding='same')(input_x)
        else:
            x = Conv1D(filters=n_filters*(2**i),
                    kernel_size=kernel_size,
                    kernel_initializer = 'lecun_normal',
                    kernel_regularizer=regularizers.l2(kernel_regulizer_l2),
                    padding='same')(x)
        
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        if (i+1) % 2 == 0: # every two layers one pooling
            x = MaxPooling1D(pool_size=pool_size)(x)
        x = Dropout(dropout)(x)
    

    x = GlobalAveragePooling1D()(x)
    x = Dense(100, kernel_initializer = 'lecun_normal', kernel_regularizer=regularizers.l2(kernel_regulizer_l2))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dense_dropout)(x)
    x = Dense(100, kernel_initializer = 'lecun_normal', kernel_regularizer=regularizers.l2(kernel_regulizer_l2))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dense_dropout)(x)
    x = Dense(20, kernel_initializer = 'lecun_normal', kernel_regularizer=regularizers.l2(kernel_regulizer_l2))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    Y_HAT = Dense(4, activation="softmax")(x)
    model = Model(inputs=input_x, outputs=Y_HAT)
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    # trainable_count = int(
    #     np.sum([K.count_params(p) for p in set(model.trainable_weights)]))
    # print('Trainable params: {:,}'.format(trainable_count))
    print("model parameters: {} M".format(model.count_params()//1000000))
    return model

In [16]:
best_model = build_model(None)

model parameters: 86 M


In [None]:
# chk = ModelCheckpoint(model_name, monitor='val_accuracy', save_best_only=True, mode='max', verbose=2)
es = EarlyStopping(monitor='val_accuracy', mode='max', verbose=1, patience=100)
best_model.fit(train, train_target, epochs=200, batch_size=640, callbacks=[es], validation_data=(validation,validation_target))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200

### Transformers

In [6]:
import tensorflow as tf
from tensorflow.keras import layers

class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}")
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
        key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
        value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
        query = self.separate_heads(query, batch_size)  # (batch_size, num_heads, seq_len, projection_dim)
        key = self.separate_heads(key, batch_size)  # (batch_size, num_heads, seq_len, projection_dim)
        value = self.separate_heads(value, batch_size)  # (batch_size, num_heads, seq_len, projection_dim)

        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(concat_attention)  # (batch_size, seq_len, embed_dim)
        return output


class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = tf.keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


def build_model(hp):
    embed_dim = hp.Int("embed_dim", 16, 128, step=16) #32  # Embedding size for each token
    num_heads = hp.Int("num_heads", 2, 8, step=2) #2  # Number of attention heads
    ff_dim = hp.Int("ff_dim", 16, 128) #32  # Hidden layer size in feed forward network inside transformer
    learning_rate = hp.Float('learning_rate', 1e-4, 1e-2, sampling='log')
    
    inputs = layers.Input(shape=(1500, 1))  # assume we have 128 time steps and 10 features
    x = TransformerBlock(embed_dim, num_heads, ff_dim)(inputs)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(30, activation="relu")(x)
    x = layers.Dropout(0.1)(x)
    outputs = layers.Dense(4, activation="softmax")(x)  # assume we have 3 classes

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    # print("model parameters: " , model.count_params()/1000)
    print(model.summary())    
    return model

In [7]:
hp_name = "hp_transformer_test"

import keras_tuner as kt

# tuner = kt.Hyperband(
#     build_model,
#     objective='val_accuracy',
#     max_epochs=500,
#     hyperband_iterations=2,
#     directory=f"./my_hp_results/{hp_name}",
#     overwrite=True
# )

# tuner.search(train,
#     train_target,
#     validation_data=(validation,validation_target),
#     epochs=500,
#     batch_size=128,
#     verbose=2,
#     callbacks=[EarlyStopping(patience=1)])

tuner = kt.BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=200,
    executions_per_trial=2,
    directory=f"./my_hp_results/{hp_name}_bayesian",
    overwrite=True
)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 1500, 1)]         0         
                                                                 
 transformer_block_1 (Transf  (None, 1500, 16)         976       
 ormerBlock)                                                     
                                                                 
 global_average_pooling1d_1   (None, 16)               0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dense_14 (Dense)            (None, 30)                510       
                                                                 
 dropout_5 (Dropout)         (None, 30)                0         
                                                                 
 dense_15 (Dense)            (None, 4)                 124 

In [None]:
tuner.search(train,
    train_target,
    validation_data=(validation,validation_target),
    epochs=10,
    batch_size=64,
    verbose=2,
    callbacks=[EarlyStopping(patience=2)])

Trial 7 Complete [00h 34m 50s]
val_accuracy: 0.45000000298023224

Best val_accuracy So Far: 0.45000000298023224
Total elapsed time: 02h 00m 06s

Search: Running Trial #8

Value             |Best Value So Far |Hyperparameter
32                |32                |embed_dim
4                 |8                 |num_heads
112               |39                |ff_dim
0.00025881        |0.0010482         |learning_rate

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 1500, 1)]         0         
                                                                 
 transformer_block (Transfor  (None, 1500, 32)         8688      
 merBlock)                                                       
                                                                 
 global_average_pooling1d (G  (None, 32)               0         
 lobalAveragePooling1D)                

### par_lstmcnn

In [19]:
def build_model(hp):
    n_lstm_layers = hp.Int("n_lstm_layers", 1, 3)
    mm_cells = hp.Int(f"mem_cells", min_value=10, max_value=100, step=10)

    n_conv_layers = hp.Int("n_conv_layers", 2, 8)    
    n_filters = hp.Int(f"n_filters", min_value=16, max_value=128, step=8)
    kernel_size = hp.Int(f"kernel_size", min_value=6, max_value=15, step=1)
    pool_size = hp.Int(f"pool_size", min_value=2, max_value=8, step=1)
    use_global_averaging = hp.Choice('use_global_averaging', [True, False])

    # recurrent_dropout = hp.Float(f'recurrent_dropout', 0, 0.8, step=0.05)
    recurrent_dropout = 0
    lstm_dropout = hp.Float(f'lstm_dropout', 0, 0.8, step=0.05, default=0.5)
    kernel_regulizer_l2 = hp.Float('kernel_regulizer_l2', 1e-7, 1e-1, sampling='log')
    cnn_dropout = hp.Float(f'cnn_dropout', 0, 0.8, step=0.05, default=0.5)

    learning_rate = hp.Float('learning_rate', 1e-7, 1e-2, sampling='log')


    ip = Input(shape=(1500, 1))
    for i in range(n_lstm_layers):
        if i == 0:
            x = LSTM(mm_cells,
                    recurrent_dropout = recurrent_dropout,
                    return_sequences= i!=(n_lstm_layers-1),
                    kernel_initializer = 'lecun_normal')(ip)
        else:
            x = LSTM(mm_cells,
                    recurrent_dropout =  recurrent_dropout,
                    return_sequences= i!=(n_lstm_layers-1),
                    kernel_initializer = 'lecun_normal')(x)
        
        if i!=(n_lstm_layers-1): # don't add dropout at the last layer
            x = Dropout(lstm_dropout)(x)
    
    # y = Permute((2, 1))(ip)
    for j in range(n_conv_layers):
        if j == 0:
            y = Conv1D(filters=n_filters*(2**j),
                    kernel_size=kernel_size,
                    kernel_initializer = 'lecun_normal',
                    kernel_regularizer=regularizers.l2(kernel_regulizer_l2),
                    padding='same')(ip)
        else:
            y = Conv1D(filters=n_filters*(2**j),
                    kernel_size=kernel_size,
                    kernel_initializer = 'lecun_normal',
                    kernel_regularizer=regularizers.l2(kernel_regulizer_l2),
                    padding='same')(y)
        y = BatchNormalization()(y)
        y = Activation('relu')(y)
        if (j+1) % 2 == 0: # every two layers one pooling
            y = AveragePooling1D(pool_size=pool_size)(y)
        y = Dropout(cnn_dropout)(y)
    
    if use_global_averaging:
        y = GlobalAveragePooling1D()(y)
    else:
        y = Flatten()(y)

    x = concatenate([x, y])
    out = Dense(4, activation='softmax')(x)
    model = Model(ip, out)
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    print("model parameters: " , model.count_params()/1000)
    # print(model.summary())
    return model

In [20]:
hp_name = "hp_parlstmcnn_test"

import keras_tuner as kt

# tuner = kt.Hyperband(
#     build_model,
#     objective='val_accuracy',
#     max_epochs=500,
#     hyperband_iterations=2,
#     directory=f"./my_hp_results/{hp_name}",
#     overwrite=True
# )

# tuner.search(train,
#     train_target,
#     validation_data=(validation,validation_target),
#     epochs=500,
#     batch_size=128,
#     verbose=2,
#     callbacks=[EarlyStopping(patience=1)])

tuner = kt.BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=200,
    executions_per_trial=2,
    directory=f"./my_hp_results/{hp_name}_bayesian",
    overwrite=True
)

model parameters:  4.06


In [21]:
tuner.search(train,
    train_target,
    validation_data=(validation,validation_target),
    epochs=30,
    batch_size=128,
    verbose=2,
    callbacks=[EarlyStopping(patience=10)])

Trial 2 Complete [00h 32m 55s]
val_accuracy: 0.3734374940395355

Best val_accuracy So Far: 0.3734374940395355
Total elapsed time: 00h 37m 06s

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
2                 |1                 |n_lstm_layers
20                |50                |mem_cells
8                 |2                 |n_conv_layers
32                |120               |n_filters
15                |14                |kernel_size
3                 |8                 |pool_size
1                 |1                 |use_global_averaging
0.35              |0.05              |lstm_dropout
0.0012172         |2.4396e-06        |kernel_regulizer_l2
0.8               |0.7               |cnn_dropout
2.0494e-05        |7.0486e-06        |learning_rate

model parameters:  167824.708
Epoch 1/100
38/38 - 241s - loss: 11.2604 - accuracy: 0.2669 - val_loss: 11.1929 - val_accuracy: 0.2500 - 241s/epoch - 6s/step
Epoch 2/100
38/38 - 107s - loss: 10.9960 - accuracy: 

KeyboardInterrupt: 

### Conv1dse

In [4]:
def squeeze_excite_block(filters,input):                      
    se = GlobalAveragePooling1D()(input)
    se = Reshape((1, filters))(se) 
    se = Dense(filters//16, activation='relu')(se)
    se = Dense(filters, activation='sigmoid')(se)
    se = multiply([input, se])
    return se

In [19]:
def build_model(hp):
    input_x = Input(shape=(1500,1,))
    num_layers = hp.Int("num_layers", 2, 8)
    n_filters = hp.Int(f"n_filters", min_value=16, max_value=128, step=8)
    kernel_size = hp.Int(f"kernel_size", min_value=4, max_value=15, step=1)
    pool_size = hp.Int(f"pool_size", min_value=2, max_value=8, step=1)
    kernel_regulizer_l2 = hp.Float('kernel_regulizer_l2', 1e-7, 1e-1, sampling='log')
    dropout = hp.Float(f'dropout', 0, 0.8, step=0.05, default=0.5)
    dense_dropout = hp.Float(f'dropout_dense', 0, 0.8, step=0.05, default=0.5)
    learning_rate=hp.Float('learning_rate', 1e-7, 1e-2, sampling='log')

    for i in range(num_layers):
        if i == 0:
            x = Conv1D(filters=n_filters*(2**i),
                    kernel_size=kernel_size,
                    kernel_initializer = 'lecun_normal',
                    kernel_regularizer=regularizers.l2(kernel_regulizer_l2),
                    padding='same')(input_x)
        else:
            x = Conv1D(filters=n_filters*(2**i),
                    kernel_size=kernel_size,
                    kernel_initializer = 'lecun_normal',
                    kernel_regularizer=regularizers.l2(kernel_regulizer_l2),
                    padding='same')(x)
        
        x = BatchNormalization()(x)
        x = squeeze_excite_block(n_filters*(2**i),x)
        x = Activation('relu')(x)
        if (i+1) % 2 == 0: # every two layers one pooling
            x = AveragePooling1D(pool_size=pool_size)(x)
        x = Dropout(dropout)(x)
    
    x = GlobalAveragePooling1D()(x)
    x = Dense(100, kernel_initializer = 'lecun_normal', kernel_regularizer=regularizers.l2(kernel_regulizer_l2))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dense_dropout)(x)
    x = Dense(100, kernel_initializer = 'lecun_normal', kernel_regularizer=regularizers.l2(kernel_regulizer_l2))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dense_dropout)(x)
    x = Dense(20, kernel_initializer = 'lecun_normal', kernel_regularizer=regularizers.l2(kernel_regulizer_l2))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    Y_HAT = Dense(4, activation="softmax")(x)
    model = Model(inputs=input_x, outputs=Y_HAT)
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    print("model parameters: {}K".format(model.count_params()//1000))
    # print(model.summary())
    return model

In [20]:
hp_name = "hp_conv1dse_test"

import keras_tuner as kt

# tuner = kt.Hyperband(
#     build_model,
#     objective='val_accuracy',
#     max_epochs=500,
#     hyperband_iterations=2,
#     directory=f"./my_hp_results/{hp_name}",
#     overwrite=True
# )

# tuner.search(train,
#     train_target,
#     validation_data=(validation,validation_target),
#     epochs=500,
#     batch_size=128,
#     verbose=2,
#     callbacks=[EarlyStopping(patience=1)])

tuner = kt.BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=200,
    executions_per_trial=2,
    directory=f"./my_hp_results/{hp_name}_bayesian",
    overwrite=True
)

model parameters: 18K


In [21]:
tuner.search(train,
    train_target,
    validation_data=(validation,validation_target),
    epochs=200,
    batch_size=128,
    verbose=2,
    callbacks=[EarlyStopping(patience=10)])


Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
7                 |7                 |num_layers
72                |72                |n_filters
11                |11                |kernel_size
6                 |6                 |pool_size
0.021848          |0.021848          |kernel_regulizer_l2
0.45              |0.45              |dropout
0.55              |0.55              |dropout_dense
1.0376e-05        |1.0376e-05        |learning_rate

model parameters: 159744K
Epoch 1/200
297/297 - 112s - loss: 186.2729 - accuracy: 0.2784 - val_loss: 165.7838 - val_accuracy: 0.2512 - 112s/epoch - 377ms/step
Epoch 2/200


KeyboardInterrupt: 

### Debug convlstm

In [3]:
n_steps = 1
n_length = 1500
n_features = 1
n_outputs = 4
train = train.reshape((train.shape[0], n_steps, 1, n_length, n_features))
validation = validation.reshape((validation.shape[0], n_steps, 1, n_length, n_features))
test = test.reshape((test.shape[0], n_steps, 1, n_length, n_features))


In [9]:
def build_model(hp):
    model = Sequential()
    num_layers = hp.Int("num_layers", 1, 2)
    # n_filters = hp.Int(f"n_filters", min_value=8, max_value=32, step=8)
    n_filters = 32
    kernel_size = 12
    # kernel_size = hp.Int(f"kernel_size", min_value=4, max_value=16, step=4)
    kernel_regulizer_l2 = hp.Float('kernel_regulizer_l2', 1e-7, 1e-1, sampling='log')
    recurrent_regulizer_l2 = hp.Float('recurrent_regulizer_l2', 1e-7, 1e-1, sampling='log')
    dense_regulizer_l2 = hp.Float('dense_regulizer_l2', 1e-7, 1e-1, sampling='log')
    dropout = hp.Float(f'dropout', 0, 0.8, step=0.05, default=0.5)
    # recurrent_dropout = hp.Float(f'recurrent_dropout', 0, 0.8, step=0.1, default=0)
    recurrent_dropout = 0
    dense_dropout = hp.Float(f'dropout_dense', 0, 0.8, step=0.05, default=0.5)
    # learning_rate=hp.Float('learning_rate', 1e-7, 1e-2, sampling='log')
    learning_rate=0.001

    for i in range(num_layers):
        if i == 0:
            model.add(ConvLSTM2D(
            filters=n_filters*(2**i),
            kernel_size=kernel_size,
            dropout=dropout,
            recurrent_dropout=recurrent_dropout,
            kernel_regularizer=regularizers.l2(kernel_regulizer_l2),
            recurrent_regularizer=regularizers.l2(recurrent_regulizer_l2),
            activation='relu',
            padding='same',
            return_sequences=True,
            input_shape=(n_steps, 1, n_length, n_features)))
        elif i < (num_layers-1):
            model.add(ConvLSTM2D(
            filters=n_filters*(2**i),
            kernel_size=kernel_size,
            dropout=dropout,
            recurrent_dropout=recurrent_dropout,
            kernel_regularizer=regularizers.l2(kernel_regulizer_l2),
            recurrent_regularizer=regularizers.l2(recurrent_regulizer_l2),
            activation='relu',
            padding='same',
            return_sequences=True))
        else:
            model.add(ConvLSTM2D(
            filters=n_filters*(2**i),
            kernel_size=kernel_size,
            dropout=dropout,
            recurrent_dropout=recurrent_dropout,
            kernel_regularizer=regularizers.l2(kernel_regulizer_l2),
            recurrent_regularizer=regularizers.l2(recurrent_regulizer_l2),
            activation='relu',
            padding='same',
            return_sequences=False))
            
    model.add(GlobalAveragePooling3D())
    model.add(Dense(100, kernel_initializer = 'lecun_normal', kernel_regularizer=regularizers.l2(dense_regulizer_l2)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dense_dropout))
    model.add(Dense(100, kernel_initializer = 'lecun_normal', kernel_regularizer=regularizers.l2(dense_regulizer_l2)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dense_dropout))
    model.add(Dense(20, kernel_initializer = 'lecun_normal', kernel_regularizer=regularizers.l2(dense_regulizer_l2)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dense(4, activation="softmax"))
    
    
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    # print("model parameters: {}K".format(model.count_params()//1000))
    print(model.summary())
    return model

In [10]:
hp_name = "hp_conv1dse_test"

import keras_tuner as kt

# tuner = kt.Hyperband(
#     build_model,
#     objective='val_accuracy',
#     max_epochs=500,
#     hyperband_iterations=2,
#     directory=f"./my_hp_results/{hp_name}",
#     overwrite=True
# )

# tuner.search(train,
#     train_target,
#     validation_data=(validation,validation_target),
#     epochs=500,
#     batch_size=128,
#     verbose=2,
#     callbacks=[EarlyStopping(patience=1)])

tuner = kt.BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=20,
    executions_per_trial=2,
    directory=f"./my_hp_results/{hp_name}_bayesian",
    overwrite=True
)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv_lstm2d_2 (ConvLSTM2D)  (None, 1, 1, 1500, 32)    608384    
                                                                 
 global_average_pooling3d (G  (None, 32)               0         
 lobalAveragePooling3D)                                          
                                                                 
 dense_4 (Dense)             (None, 100)               3300      
                                                                 
 batch_normalization_3 (Batc  (None, 100)              400       
 hNormalization)                                                 
                                                                 
 activation_3 (Activation)   (None, 100)               0         
                                                                 
 dropout_2 (Dropout)         (None, 100)              

In [12]:
tuner.search(train,
    train_target,
    validation_data=(validation,validation_target),
    epochs=30,
    batch_size=512,
    verbose=1,
    callbacks=[EarlyStopping(patience=5)])

Trial 3 Complete [00h 00m 01s]

Best val_accuracy So Far: 0.3738125115633011
Total elapsed time: 00h 32m 50s

Search: Running Trial #4

Value             |Best Value So Far |Hyperparameter
2                 |1                 |num_layers
0.001983          |7.9538e-05        |kernel_regulizer_l2
0.00019192        |0.010572          |recurrent_regulizer_l2
1.9885e-06        |0.00014611        |dense_regulizer_l2
0.2               |0.55              |dropout
0.5               |0.8               |dropout_dense



Traceback (most recent call last):
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras_tuner/engine/base_tuner.py", line 270, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras_tuner/engine/base_tuner.py", line 235, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras_tuner/engine/tuner.py", line 287, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras_tuner/engine/tuner.py", line 213, in _build_and_fit_model
    model = self._try_build(hp)
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras_tuner/engine/tuner.py", line 155, in _try_build
    model = self._build_hypermodel(hp)
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras_tuner/engine/tuner.py", line 146, in _bu

RuntimeError: Number of consecutive failures excceeded the limit of 3.
Traceback (most recent call last):
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras_tuner/engine/base_tuner.py", line 270, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras_tuner/engine/base_tuner.py", line 235, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras_tuner/engine/tuner.py", line 287, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras_tuner/engine/tuner.py", line 213, in _build_and_fit_model
    model = self._try_build(hp)
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras_tuner/engine/tuner.py", line 155, in _try_build
    model = self._build_hypermodel(hp)
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras_tuner/engine/tuner.py", line 146, in _build_hypermodel
    model = self.hypermodel.build(hp)
  File "/tmp/ipykernel_8551/198724820.py", line 54, in build_model
    model.add(GlobalAveragePooling3D())
  File "/home/rezmiry/venv/lib/python3.10/site-packages/tensorflow/python/trackable/base.py", line 205, in _method_wrapper
    result = method(self, *args, **kwargs)
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/home/rezmiry/venv/lib/python3.10/site-packages/keras/engine/input_spec.py", line 235, in assert_input_compatibility
    raise ValueError(
ValueError: Input 0 of layer "global_average_pooling3d" is incompatible with the layer: expected ndim=5, found ndim=4. Full shape received: (None, 1, 1500, 64)
