### Research Process 


#### Init Libraries

In [None]:
import pandas as pd
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import RobustScaler
import os

#### Load Dataset and split into Features and Label

In [None]:
#load data
df=pd.read_csv("breast-cancer-wisconsin-data/data.csv")
#drop irelevent columns for the classification
df = df.drop(columns=['Unnamed: 32', 'id'])
# rearange the data for X - featuers and Y leabels 
X = df.iloc[:, 1:]
y = df.iloc[:, 0]

#### Encode Labels

In [None]:
le = LabelEncoder()
y = le.fit_transform(y)

#### The mapping of the labels

In [None]:
le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print(le_name_mapping)

#### Splitting the data into Train, Test and Valid datasets

In [None]:
#split the data
from sklearn.model_selection import train_test_split
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=42)

#### Scale the data

In [None]:
scaler = RobustScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

X_train_val = X_train_val.values

In [None]:
X_train

#### Handling the imbalance between the classes

In [None]:
#handling imbalance data 
from sklearn.utils.class_weight import compute_class_weight

# Compute class weights
#the class weight is done only on the train data to impact the learning process and to evaluete beter the model proformence
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))
class_weight_dict

as predicted the minorty class 'M'=1 gets higher weight of 1.368

### Neural Networks

##### Init Libraries

In [None]:
#from keras.models import Sequential
#from keras.layers import Dense
#from keras.regularizers import l1, l2
#from keras.layers import Dropout, Flatten, BatchNormalization
from keras import models
from keras import layers
from keras import regularizers
from keras import optimizers
from keras import losses
from keras import metrics
from keras import callbacks
from keras import utils

from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold

import tensorflow as tf

##### Function Helpers

In [None]:
def model_result(model):
    print('================================================================================')
    print(f'Evaluation Report for Model: {model.name}')

    # accuracy
    result = model.evaluate(X_test, y_test, verbose=0)        
    print(f'Loss Value: {result[0]:.3f}, Accuracy: {result[1]*100:.3f}%')

    # confusion matrix
    y_prob = model.predict(X_test, verbose=0)
    y_pred = np.round(y_prob).astype(int)    
    cm_test = confusion_matrix(y_test, y_pred)    
    #sns.heatmap(cm_test,annot=True)
    cm_disp = ConfusionMatrixDisplay(cm_test, display_labels=le.classes_)
    cm_disp.plot()
    plt.show()

    # classification report
    report = classification_report(y_test, y_pred, target_names=le.classes_)
    print(report)

    print('Model Summary:')
    print(model.summary())
    print('================================================================================')

In [None]:
def model_history(history):
    plt.figure(figsize=(15,6))

    val_loss_min_pos = np.argmin(history.history['val_loss'])

    # Plotting the training and validation loss
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Train Loss', color='#8502d1')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Train and Validation Loss')
    plt.plot(val_loss_min_pos, history.history['val_loss'][val_loss_min_pos], 'r*', label='Min Validation Loss')
    plt.text(val_loss_min_pos, history.history['val_loss'][val_loss_min_pos], f'({val_loss_min_pos}, {history.history['val_loss'][val_loss_min_pos]:.3f})', va='bottom')
    plt.legend()

    # Plotting the training and validation accuracy
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Train Accuracy', color='#8502d1')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.plot(val_loss_min_pos, history.history['val_accuracy'][val_loss_min_pos], 'r*', label='Validation Accuracy @ Min Validation Loss')
    plt.text(val_loss_min_pos, history.history['val_accuracy'][val_loss_min_pos], f'({val_loss_min_pos}, {history.history['val_accuracy'][val_loss_min_pos]:.3f})', va='bottom')
    plt.title('Train and Validation Accuracy')
    plt.legend()

    plt.show()    

In [None]:
def proc_history(history):

    val_loss_min_pos = np.argmin(history.history['val_loss'])

    train_acc = history.history['accuracy'][val_loss_min_pos]
    train_loss = history.history['loss'][val_loss_min_pos]

    val_acc = history.history['val_accuracy'][val_loss_min_pos]
    val_loss = history.history['val_loss'][val_loss_min_pos]    

    val_recall = history.history['val_recall'][val_loss_min_pos]

    return {'Train Accuracy': train_acc, 'Train Loss': train_loss, 'Validation Accuracy': val_acc, 'Validation Loss': val_loss, 'Validation Recall': val_recall}

In [None]:
def model_fit(model, optimizer=optimizers.RMSprop, learning_rate=0.001, epochNum=1000, batchSize=32, en_reduce_lr=False, en_early_stopping=True, pca=False, verbose="auto", Dataset=None):      
    
    
    early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True, verbose=0)
    reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.00001, verbose=0)
    checkpoint_filepath = f'model_checkpoints/{model.name}_checkpoint.model.keras'
    model_checkpoint_callback = callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath,
        monitor='val_loss',
        mode='min',
        save_best_only=True,
        verbose=0)
    
    callbacks_list = [model_checkpoint_callback]

    if en_reduce_lr:
        callbacks_list.append(reduce_lr)

    if en_early_stopping:
        callbacks_list.append(early_stopping)

    model.compile(optimizer=optimizer(learning_rate=learning_rate), loss='binary_crossentropy', metrics=['accuracy', metrics.Recall(name='recall')])
    if Dataset is None:
        history = model.fit(X_train, y_train, batch_size=batchSize, epochs=epochNum, validation_data=(X_val, y_val), class_weight=class_weight_dict, callbacks=callbacks_list, verbose=verbose)
    else:
        history = model.fit(Dataset['X_train'], Dataset['y_train'], batch_size=batchSize, epochs=epochNum, validation_data=(Dataset['X_val'], Dataset['y_val']), class_weight=class_weight_dict, callbacks=callbacks_list, verbose=verbose)

    model = models.load_model(checkpoint_filepath)

    return history, model

In [None]:
def model_kfold(xtrain, ytrain, model_base, optimizer=optimizers.RMSprop,learning_rate=0.001, epochNum=1000, batchSize=32, en_reduce_lr=False, en_early_stopping=True, verbose="auto"):
    model = models.clone_model(model_base)
    fold_k = StratifiedKFold(n_splits = 5).split(xtrain, ytrain)
    
    result_df = pd.DataFrame(columns=['k', 'Train Accuracy', 'Train Loss' , 'Validation Accuracy', 'Validation Loss',  'Validation Recall'])
    for k , (train, valid) in enumerate(fold_k):

        scaler = RobustScaler()

        X_train = scaler.fit_transform(xtrain[train])
        X_val = scaler.transform(xtrain[valid])
        
        dataset = {'X_train': X_train, 'y_train': ytrain[train], 'X_val': X_val, 'y_val': ytrain[valid]}
        model.set_weights(model_base.get_weights())

        

        history, model = model_fit(model, optimizer=optimizer, learning_rate=learning_rate, epochNum=epochNum, batchSize=batchSize, en_reduce_lr=en_reduce_lr, en_early_stopping=en_early_stopping, Dataset=dataset, verbose=verbose)    
        
        proc_data = proc_history(history)

        new_row = {'k': k, **proc_data}

        result_df.loc[len(result_df)] = new_row

    return result_df

##### Build the model

In [None]:
models_list = []

### SLP

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  

#### Baseline

In [None]:
CURRENT_MODEL = models.Sequential(name=f'SLP_1')

CURRENT_MODEL.add(layers.Input((30,)))
CURRENT_MODEL.add(layers.Dense(1, activation='sigmoid'))

In [None]:
model = models.clone_model(CURRENT_MODEL)
model.set_weights(CURRENT_MODEL.get_weights())

##### Baseline - Results

In [None]:
history, model = model_fit(model, epochNum=1000, batchSize=32, verbose=0)

proc_data = proc_history(history)

In [None]:
model_history(history)

In [None]:
model.set_weights(CURRENT_MODEL.get_weights())

res_df = model_kfold(X_train_val, y_train_val, model, epochNum=1000, batchSize=32, verbose=0)
print(f"Validation Accuracy Mean: {res_df['Validation Loss'].mean()}, Validation Accuracy STD: {res_df['Validation Loss'].std()}")    
print(f"Validation Loss Mean: {res_df['Validation Loss'].mean()}, Validation Loss STD: {res_df['Validation Loss'].std()}")    
res_df

#### Optimizers

In [None]:
df_batch_size = pd.DataFrame(columns=['Optimizer', 'Train Accuracy', 'Train Loss', 'Validation Accuracy', 'Validation Loss', 'Validation Recall'])

for optimizer in [optimizers.Adam, optimizers.RMSprop, optimizers.SGD, optimizers.Adagrad]:
    print(f'------------------------------------------------------------------------------------------------------------------')
    print(f'Optimizer = {optimizer}')    
        
    model.set_weights(CURRENT_MODEL.get_weights())   

    learning_rate = 0.001
    if(optimizer == optimizers.SGD or optimizer==optimizers.Adagrad):
        learning_rate = 0.01

    history, model = model_fit(model, optimizer=optimizer, learning_rate = learning_rate, epochNum=2000, en_early_stopping=True, verbose=0)        
    proc_data = proc_history(history)
    
    new_row = {'Optimizer': optimizer, **proc_data}
    df_batch_size.loc[len(df_batch_size)] = new_row
    model_history(history)


Conclusion: We can see in the graphs that the optimizers achieve similar results, while SGD and Adagrad take a large amount of Epochs to converge.

In [None]:
model.set_weights(CURRENT_MODEL.get_weights())
ADAM_res_df = model_kfold(X_train_val, y_train_val, model, optimizer=optimizers.Adam, learning_rate=0.001, epochNum=1000, batchSize=32, verbose=0)

model.set_weights(CURRENT_MODEL.get_weights())
RMS_res_df = model_kfold(X_train_val, y_train_val, model, optimizer=optimizers.RMSprop, learning_rate=0.001, epochNum=1000, batchSize=32, verbose=0)

model.set_weights(CURRENT_MODEL.get_weights())
SGD_res_df = model_kfold(X_train_val, y_train_val, model, optimizer=optimizers.SGD, learning_rate=0.01, epochNum=1000, batchSize=32, verbose=0)

In [None]:
print(f"Validation Loss Mean: {ADAM_res_df['Validation Loss'].mean()}, Validation Loss STD: {ADAM_res_df['Validation Loss'].std()}")
ADAM_res_df

In [None]:
print(f"Validation Loss Mean: {RMS_res_df['Validation Loss'].mean()}, Validation Loss STD: {RMS_res_df['Validation Loss'].std()}")
RMS_res_df

In [None]:
print(f"Validation Loss Mean: {SGD_res_df['Validation Loss'].mean()}, Validation Loss STD: {SGD_res_df['Validation Loss'].std()}")
SGD_res_df

Conclusion: We will take the best Optimizer as SGD, as it yields the lowest STD value in the Validation Loss.

In [None]:
BEST_OPTIMIZER = optimizers.SGD

#### Learning-Rate

In [None]:
df_res = pd.DataFrame(columns=['Learning Rate', 'Train Accuracy', 'Train Loss', 'Validation Accuracy', 'Validation Loss', 'Validation Recall'])

for learning_rate in [0.1, 0.01, 0.001, 0.0001]:
    print(f'------------------------------------------------------------------------------------------------------------------')
    print(f'Learning Rate = {learning_rate}')    
        
    model.set_weights(CURRENT_MODEL.get_weights())   

    history, model = model_fit(model, optimizer=BEST_OPTIMIZER, learning_rate=learning_rate, epochNum=2000, en_early_stopping=True, verbose=0)
    proc_data = proc_history(history)
    
    new_row = {'Learning Rate': learning_rate, **proc_data}
    df_res.loc[len(df_res)] = new_row
    model_history(history)


Conlusion: From the graphs above, we can infer that a learning rate of 0.1 and 0.01 yields the same results, however with a learning rate of 0.1 it yields the result 10 times faster, which means the model can handle a large learning rate at the begining of the training. <br>
In the next steps, we will include a learning rate scheduler, which will decrease the learning rate once it reaches a plateau.

In [None]:
BEST_LEARNING_RATE = 0.1

#### Network Architecture

In [None]:
nn_arch_df = pd.DataFrame(columns=['Model Name', 'Train Accuracy', 'Train Loss', 'Validation Accuracy', 'Validation Loss', 'Validation Recall'])

##### NN 30x1

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### First Hidden Layer

##### NN 30x1x1

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x1x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(1, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x5x1

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x5x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(5, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x10x1

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x10x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x20x1

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x20x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(20, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x30x1

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

Concluion: We see that NN_30x30x1 is yields the best results for 1st hidden layer. <br>
We will check what is the best activation function for this layer.

#### Activation Functions

##### NN 30x30x1 - leaky_relu

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30-leaky_relu-x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='leaky_relu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x30x1 - silu

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30-silu-x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='silu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x30x1 - elu

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30-elu-x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='elu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x30x1 - tanh

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30-elu-x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='tanh'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

Concluion: We see that relu yields the best results for 1st hidden layer activation function. <br>

##### Second Hidden Layer

##### NN 30x30x1x1

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30x1x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(1, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x30x5x1

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30x5x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(5, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x30x10x1

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30x10x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x30x20x1

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30x20x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(20, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x30x30x1

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30x30x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

Concluion: We see that NN_30x30x5x1 is yields the best results for 2st hidden layer. <br>
We will check what is the best activation function for this layer.

#### Activation Functions

##### NN 30x30x5x1 - leaky_relu

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30x5-leaky_relu-x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(5, activation='leaky_relu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x30x5x1 - silu

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30x5-silu-x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(5, activation='silu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x30x5x1 - elu

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30x5-elu-x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(5, activation='elu'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

##### NN 30x30x5x1 - tanh

In [None]:
# deterministic fitting   
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30x5-tanh-x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(5, activation='tanh'))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, en_early_stopping=True, en_reduce_lr=True, verbose=0)
proc_data = proc_history(history)

new_row = {'Model Name': model.name, **proc_data}
nn_arch_df.loc[len(nn_arch_df)] = new_row

model_history(history)
print(proc_data)

Concluion: We see that relu yields the best results for 2st hidden layer activation function. <br>

In [None]:
nn_arch_df

In [None]:
nn_arch_df.sort_values(by='Validation Loss')

Conclusion: We can see that the Network Architecture of 30x30x5x1 yields the best results, while a leaky-relu or relu activation function in the second hidden layer yields the same results, thus we will remain with the default activation function. <br>
We will check this network with KFolds.

In [None]:
os.environ['PYTHONHASHSEED']=str(0)
utils.set_random_seed(0)
tf.config.experimental.enable_op_determinism()  
        
model = models.Sequential(name=f'NN_30x30x5x1')

model.add(layers.Input((30,)))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(5, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

res_df = model_kfold(X_train_val, y_train_val, model, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, epochNum=1000, batchSize=32, en_reduce_lr=True, en_early_stopping=True, verbose=0)
print(f"Validation Accuracy Mean: {res_df['Validation Loss'].mean()}, Validation Accuracy STD: {res_df['Validation Loss'].std()}")    
print(f"Validation Loss Mean: {res_df['Validation Loss'].mean()}, Validation Loss STD: {res_df['Validation Loss'].std()}")    
res_df

In [None]:
BEST_MODEL

#### Batch-Size

In [None]:
df_batch_size = pd.DataFrame(columns=['Batch Size', 'Train Accuracy', 'Train Loss', 'Validation Accuracy', 'Validation Loss', 'Validation Recall'])

for batch_size in [1, 2, 4, 8, 16, 32, 64, 128]:
    print(f'------------------------------------------------------------------------------------------------------------------')
    print(f'Batch Size = {batch_size}')    
        
    model.set_weights(CURRENT_MODEL.get_weights())   

    history, model = model_fit(model, epochNum=1000, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, batchSize=batch_size, en_early_stopping=True, en_reduce_lr=True, verbose=0)
    proc_data = proc_history(history)
    
    new_row = {'Batch Size': batch_size, **proc_data}
    df_batch_size.loc[len(df_batch_size)] = new_row
    model_history(history)


In [None]:
for batch_size in [1, 2, 4, 8, 16, 32, 64, 128]:
    model.set_weights(CURRENT_MODEL.get_weights())
    batch_size_res_df = model_kfold(X_train_val, y_train_val, model, optimizer=BEST_OPTIMIZER, learning_rate=BEST_LEARNING_RATE, epochNum=1000, batchSize=batch_size, en_reduce_lr=True, en_early_stopping=True, verbose=0)
    print('----------------------------------------------------------------------------------------------------')
    print(f'Batch Size = {batch_size}')
    print(f"Validation Loss Mean: {batch_size_res_df['Validation Loss'].mean()}, Validation Loss STD: {batch_size_res_df['Validation Loss'].std()}")    
    print(batch_size_res_df)    
