### Research Process 


#### Init Libraries

In [None]:
import pandas as pd
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import RobustScaler


#### Load Dataset and split into Features and Label

In [None]:
#load data
df=pd.read_csv("breast-cancer-wisconsin-data/data.csv")
#drop irelevent columns for the classification
df = df.drop(columns=['Unnamed: 32', 'id'])
# rearange the data for X - featuers and Y leabels 
X = df.iloc[:, 1:]
y = df.iloc[:, 0]

#### Encode Labels

In [None]:
le = LabelEncoder()
y = le.fit_transform(y)

#### The mapping of the labels

In [None]:
le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print(le_name_mapping)

#### Splitting the data into Train, Test and Valid datasets

In [None]:
#split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

#### Scale the data

In [None]:
scaler = RobustScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

#### Handling the imbalance between the classes

In [None]:
#handling imbalance data 
from sklearn.utils.class_weight import compute_class_weight

# Compute class weights
#the class weight is done only on the train data to impact the learning process and to evaluete beter the model proformence
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))
class_weight_dict

as predicted the minorty class 'M'=1 gets higher weight of 1.368

### Neural Networks

##### Init Libraries

In [None]:
#from keras.models import Sequential
#from keras.layers import Dense
#from keras.regularizers import l1, l2
#from keras.layers import Dropout, Flatten, BatchNormalization
from keras import models
from keras import layers
from keras import regularizers
from keras import optimizers
from keras import losses
from keras import metrics
from keras import callbacks
from keras import utils

from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import classification_report

import tensorflow as tf

tf.config.experimental.enable_op_determinism()

##### Function Helpers

In [None]:
def model_result(model):
    print('================================================================================')
    print(f'Evaluation Report for Model: {model.name}')

    # accuracy
    result = model.evaluate(X_test, y_test, verbose=0)        
    print(f'Loss Value: {result[0]:.3f}, Accuracy: {result[1]*100:.3f}%')

    # confusion matrix
    y_prob = model.predict(X_test, verbose=0)
    y_pred = np.round(y_prob).astype(int)    
    cm_test = confusion_matrix(y_test, y_pred)    
    #sns.heatmap(cm_test,annot=True)
    cm_disp = ConfusionMatrixDisplay(cm_test, display_labels=le.classes_)
    cm_disp.plot()
    plt.show()

    # classification report
    report = classification_report(y_test, y_pred, target_names=le.classes_)
    print(report)

    print('Model Summary:')
    print(model.summary())
    print('================================================================================')

In [None]:
def model_history(history):
    plt.figure(figsize=(15,6))

    val_loss_min_pos = np.argmin(history.history['val_loss'])

    # Plotting the training and validation loss
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Train Loss', color='#8502d1')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Train and Validation Loss')
    plt.plot(val_loss_min_pos, history.history['val_loss'][val_loss_min_pos], 'r*', label='Min Validation Loss')
    plt.text(val_loss_min_pos, history.history['val_loss'][val_loss_min_pos], f'({val_loss_min_pos}, {history.history['val_loss'][val_loss_min_pos]:.3f})', va='bottom')
    plt.legend()

    # Plotting the training and validation accuracy
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Train Accuracy', color='#8502d1')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.plot(val_loss_min_pos, history.history['val_accuracy'][val_loss_min_pos], 'r*', label='Validation Accuracy @ Min Validation Loss')
    plt.text(val_loss_min_pos, history.history['val_accuracy'][val_loss_min_pos], f'({val_loss_min_pos}, {history.history['val_accuracy'][val_loss_min_pos]:.3f})', va='bottom')
    plt.title('Train and Validation Accuracy')
    plt.legend()

    plt.show()    

In [None]:
def proc_history(history):

    val_loss_min_pos = np.argmin(history.history['val_loss'])

    train_acc = history.history['accuracy'][val_loss_min_pos]
    train_loss = history.history['loss'][val_loss_min_pos]

    val_acc = history.history['val_accuracy'][val_loss_min_pos]
    val_loss = history.history['val_loss'][val_loss_min_pos]

    return {'Train Accuracy': train_acc, 'Train Loss': train_loss, 'Validation Accuracy': val_acc, 'Validation Loss': val_loss}

In [None]:
def model_fit(model, epochNum, pca=False, verbose='auto'):    
    utils.set_random_seed(0)
    
    early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True, verbose=verbose)
    reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.00001, verbose=verbose)
    checkpoint_filepath = f'model_checkpoints/{model.name}_checkpoint.model.keras'
    model_checkpoint_callback = callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath,
        monitor='val_loss',
        mode='min',
        save_best_only=True,
        verbose=verbose)

    model.compile(optimizer=optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    history = model.fit(X_train, y_train, batch_size=50, epochs=epochNum, validation_data=(X_val, y_val), class_weight=class_weight_dict, callbacks=[model_checkpoint_callback, reduce_lr, early_stopping], verbose=verbose)

    model = models.load_model(checkpoint_filepath)

    return history, model

##### Build the model

#### SLP

##### SLP Baseline

In [None]:
utils.set_random_seed(0)
MODEL_NAME = 'SLP'

model = models.Sequential(name=f'{MODEL_NAME}')
model.add(layers.Dense(1, activation='sigmoid', input_shape=(30,)))

history, model = model_fit(model, epochNum=600)

In [None]:
model_history(history)

In [None]:
model_result(model)

##### SLP with Dropout 0.2

In [None]:
utils.set_random_seed(0)
MODEL_NAME = 'SLP_Dropout_0.2'

model = models.Sequential(name=f'{MODEL_NAME}')

model.add(layers.Dropout(rate=0.2, input_shape=(30,)))

model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000)

In [None]:
model_history(history)

In [None]:
model_result(model)

##### SLP with Dropout 0.1

In [None]:
utils.set_random_seed(0)
MODEL_NAME = 'SLP_Dropout_0.1'

model = models.Sequential(name=f'{MODEL_NAME}')

model.add(layers.Dropout(rate=0.1, input_shape=(30,)))

model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000)

In [None]:
model_history(history)

In [None]:
model_result(model)

#### MLP

##### MLP Baseline

In [None]:
utils.set_random_seed(0)
MODEL_NAME = 'MLP'

model = models.Sequential(name=f'{MODEL_NAME}')
model.add(layers.Dense(1, activation='relu', input_shape=(30,)))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=2000)

In [None]:
model_history(history)

In [None]:
model_result(model)

##### MLP - 2 Neurons

In [None]:
utils.set_random_seed(0)
MODEL_NAME = 'MLP'

model = models.Sequential(name=f'{MODEL_NAME}')
model.add(layers.Dense(2, activation='relu', input_shape=(30,)))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=2000)

In [None]:
model_history(history)

In [None]:
model_result(model)

##### MLP [1:30]

In [None]:
utils.set_random_seed(0)
MODEL_NAME = 'MLP'

model = models.Sequential(name=f'{MODEL_NAME}')
model.add(layers.Dense(30, activation='relu', input_shape=(30,)))
model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=600)

In [None]:
model_history(history)

In [None]:
model_result(model)

##### MLP [1:30, 2:30]

In [None]:
utils.set_random_seed(0)
MODEL_NAME = 'MLP_L1_30_L2_30'

model = models.Sequential(name=f'{MODEL_NAME}')

model.add(layers.Dense(30, activation='relu', input_shape=(30,)))

model.add(layers.Dense(30, activation='relu', input_shape=(30,)))

model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=600)

In [None]:
model_history(history)

In [None]:
model_result(model)

##### MLP [1:30] Dropout 0.5

In [None]:
utils.set_random_seed(0)
MODEL_NAME = 'MLP_Dropout_0.5'

model = models.Sequential(name=f'{MODEL_NAME}')

model.add(layers.Dense(30, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))

model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000)

In [None]:
model_history(history)

In [None]:
model_result(model)

##### MLP [1:30 Dropout 0.5, 2:30 Dropout 0.5,] 

In [None]:
utils.set_random_seed(0)
MODEL_NAME = 'MLP_Dropout_0.5'

model = models.Sequential(name=f'{MODEL_NAME}')

model.add(layers.Dense(30, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))

model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dropout(rate=0.5))

model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000)

In [None]:
model_history(history)

In [None]:
model_result(model)

##### MLP [1:15 Dropout 0.5, 2:15 Dropout 0.5,] 

In [None]:
utils.set_random_seed(0)
MODEL_NAME = 'MLP_L1_15_L2_15_Dropout_0.5'

model = models.Sequential(name=f'{MODEL_NAME}')

model.add(layers.Dense(15, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))

model.add(layers.Dense(15, activation='relu'))
model.add(layers.Dropout(rate=0.5))

model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000)

In [None]:
model_history(history)

In [None]:
model_result(model)

##### MLP [1:15 tanh Dropout 0.5, 2:15 tanh Dropout 0.5,] 

In [None]:
utils.set_random_seed(0)
MODEL_NAME = 'MLP_L1_15_tanh_L2_15_tanh_Dropout_0.5'

model = models.Sequential(name=f'{MODEL_NAME}')

model.add(layers.Dense(15, activation='tanh', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))

model.add(layers.Dense(15, activation='tanh'))
model.add(layers.Dropout(rate=0.5))

model.add(layers.Dense(1, activation='sigmoid'))

history, model = model_fit(model, epochNum=1000)

In [None]:
model_history(history)

In [None]:
model_result(model)

In [None]:
models_list = []

### SLP

In [None]:
# SLP
model = models.Sequential(name=f'SLP_1')
model.add(layers.Dense(1, activation='sigmoid', input_shape=(30,)))
models_list.append(model)

# Add Dropout 0.1, 0.2
model = models.Sequential(name=f'SLP_2')
model.add(layers.Dropout(rate=0.1, input_shape=(30,)))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'SLP_3')
model.add(layers.Dropout(rate=0.2, input_shape=(30,)))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

### MLP

In [None]:
# MLP 1 Layer
model = models.Sequential(name=f'MLP_1')
model.add(layers.Dense(2, activation='relu', input_shape=(30,)))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_2')
model.add(layers.Dense(5, activation='relu', input_shape=(30,)))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_3')
model.add(layers.Dense(10, activation='relu', input_shape=(30,)))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_4')
model.add(layers.Dense(15, activation='relu', input_shape=(30,)))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_5')
model.add(layers.Dense(30, activation='relu', input_shape=(30,)))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

In [None]:
# MLP 2 Layer
model = models.Sequential(name=f'MLP_6')
model.add(layers.Dense(2, activation='relu', input_shape=(30,)))
model.add(layers.Dense(2, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid')), models_list.append(model)

model = models.Sequential(name=f'MLP_7')
model.add(layers.Dense(5, activation='relu', input_shape=(30,)))
model.add(layers.Dense(5, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_8')
model.add(layers.Dense(10, activation='relu', input_shape=(30,)))
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_9')
model.add(layers.Dense(15, activation='relu', input_shape=(30,)))
model.add(layers.Dense(15, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_10')
model.add(layers.Dense(30, activation='relu', input_shape=(30,)))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

In [None]:
# MLP 3 Layer
model = models.Sequential(name=f'MLP_11')
model.add(layers.Dense(30, activation='relu', input_shape=(30,)))
model.add(layers.Dense(15, activation='relu'))
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_12')
model.add(layers.Dense(10, activation='relu', input_shape=(30,)))
model.add(layers.Dense(15, activation='relu'))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

### MLP - Dropout 0.3

In [None]:
# MLP 1 Layer
model = models.Sequential(name=f'MLP_13')
model.add(layers.Dense(2, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_14')
model.add(layers.Dense(5, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_15')
model.add(layers.Dense(10, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_16')
model.add(layers.Dense(15, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_17')
model.add(layers.Dense(30, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

In [None]:
# MLP 2 Layer
model = models.Sequential(name=f'MLP_18')
model.add(layers.Dense(2, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(2, activation='relu'))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(1, activation='sigmoid')), models_list.append(model)

model = models.Sequential(name=f'MLP_19')
model.add(layers.Dense(5, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(5, activation='relu'))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_20')
model.add(layers.Dense(10, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_21')
model.add(layers.Dense(15, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(15, activation='relu'))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_22')
model.add(layers.Dense(30, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

In [None]:
# MLP 3 Layer
model = models.Sequential(name=f'MLP_23')
model.add(layers.Dense(30, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(15, activation='relu'))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_24')
model.add(layers.Dense(10, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(15, activation='relu'))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dropout(rate=0.3))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

### MLP - Dropout 0.5

In [None]:
# MLP 1 Layer
model = models.Sequential(name=f'MLP_25')
model.add(layers.Dense(2, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_26')
model.add(layers.Dense(5, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_27')
model.add(layers.Dense(10, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_28')
model.add(layers.Dense(15, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_29')
model.add(layers.Dense(30, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

In [None]:
# MLP 2 Layer
model = models.Sequential(name=f'MLP_30')
model.add(layers.Dense(2, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(2, activation='relu'))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(1, activation='sigmoid')), models_list.append(model)

model = models.Sequential(name=f'MLP_31')
model.add(layers.Dense(5, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(5, activation='relu'))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_32')
model.add(layers.Dense(10, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_33')
model.add(layers.Dense(15, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(15, activation='relu'))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_34')
model.add(layers.Dense(30, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

In [None]:
# MLP 3 Layer
model = models.Sequential(name=f'MLP_35')
model.add(layers.Dense(30, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(15, activation='relu'))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

model = models.Sequential(name=f'MLP_36')
model.add(layers.Dense(10, activation='relu', input_shape=(30,)))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(15, activation='relu'))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(30, activation='relu'))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(1, activation='sigmoid'))
models_list.append(model)

In [None]:
from time import time

models_df = pd.DataFrame(columns=['Model Name', 'Train Accuracy', 'Train Loss', 'Validation Accuracy', 'Validation Loss'])
print('Processing Models:')
for model in models_list:
    print(f'Fitting {model.name}...', end= " ")    

    t_start = time()    
    history, model = model_fit(model, 2000, verbose=0)
    t_end = time()

    proc_data = proc_history(history)    

    new_row = {'Model Name': model.name, **proc_data}
    models_df.loc[len(models_df)] = new_row

    print(f'Done! ({t_end-t_start} [s])')