In [None]:
import pandas as pd

# Data Loading

In [None]:
from api.data_handler import DataHandler

# Creation of a DataHandler Object
data_handler = DataHandler(['target', 'col1', 'col2', 'col3', 'col4', 'col5', 'col6', 'id'])

# Number of different Datasets
datasets_number = 3

# Lists of DataFrames
df_train : list[pd.DataFrame] = []
df_test  : list[pd.DataFrame] = []

# Load the Training/Test sets into pandas DataFrames
for i in range(datasets_number):
    df_train.append(data_handler.load_data(f'data/monks/monks-{i+1}.train'))
    df_test.append(data_handler.load_data(f'data/monks/monks-{i+1}.test'))

    # Print the head of the loaded data
    print(df_train[i].head())
    print(df_test[i].head())

# Data Split

In [None]:
# Lists of data
x_train : list[pd.DataFrame] = []
y_train : list[pd.DataFrame] = []
x_test  : list[pd.DataFrame] = []
y_test  : list[pd.DataFrame] = []

# Split data into TR set and TS set
for i in range(datasets_number):

    # Saving the splitted TR set data into the lists
    df_train[i] = df_train[i].drop(columns=['id'], axis=1).copy(deep=True)
    x, y = data_handler.split_data(data=df_train[i], cols_name_split=['target'])
    x_train.append(x)
    y_train.append(y)

    # Saving the splitted TS set data into the lists
    df_test[i] = df_test[i].drop(columns=['id'], axis=1).copy(deep=True)
    x, y = data_handler.split_data(data=df_test[i], cols_name_split=['target'])
    x_test.append(x)
    y_test.append(y)


# 1-Hot Encoding

In [None]:
# Applies the 1-Hot Encoding to the "x" data
for i in range(datasets_number):
    x_train[i] = data_handler.one_hot_encoding(x_train[i])
    x_test[i]  = data_handler.one_hot_encoding(x_test[i])

    # Print of the data modified
    print(f"Monk {i+1} [TRAIN]: " + str(x_train[i].shape))
    print(f"Monk {i+1} [TEST]: " + str(x_test[i].shape))

# Grid Search parameters

In [None]:
# Parameters' space for complete Grid Search (1 for each Dataset)
param_space = {
    0: {
        'hidden_units': [3, 4],
        'patience': [15, 30],
        'learning_rate': [0.4, 0.3],
        'batch_size': [4, 6],
        'nesterov': ["T", "F"],
        'epochs': [180, 250],
        'momentum': [0.6, 0.7]
    },
    1: {
        'hidden_units': [3, 4, 5],
        'patience': [15, 30],
        'factor_lr_dec': [0.5, 1],
        'step_decay': [500, 1000, 1500],
        'learning_rate': [0.9, 0.8, 0.7],
        'batch_size': [10, 30, 60], 
        'epochs': [180, 200],
        'momentum': [0.6, 0.7, 0.8],
        'nesterov': ["T", "F"],
    },
    
     2: {
        'hidden_units': [3, 4],
        'patience': [10,15],
        'nesterov': ["F", "T"],
        'learning_rate': [0.1, 0.2, 0.3],
        'batch_size': [62, 64],
        'epochs': [180, 270],
        'weight_decay': [0.4, 0.5],
        'momentum': [0.2, 0.3],
        'factor_lr_dec': [0.5, 1],
        'step_decay': [500, 1000],
    }

        
}

# Model Selection

## Best Hyperparameters Research

In [None]:
from sklearn.model_selection import StratifiedKFold
from api.binary_nn import BinaryNN

# Creation of a BinaryNN objct for each dataset
nn: list[BinaryNN] = []

# Different values per dataset
#trials_list = [1, 1, 200]  # For randomized grid search, first step
k_values = [5, 5, 5]
n_hidden_layers_list = [1, 1, 1]

# Search of the best Hyperparameters to each Training set
for dataset_i in range(datasets_number):
    X = x_train[dataset_i].values
    y = y_train[dataset_i].values
    k = k_values[dataset_i]

    # K-fold Cross-validation
    kfold = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
     
    # Randomized grid search with trials
    #for i in range(trials_list[dataset_i]):

    # Computes and Stores all the parameters combinations for complete grid 
    data_handler.set_params_combinations(params=param_space[dataset_i])
        
        # First we use a randomized grid search
        #params = data_handler.random_dictionary(params=param_space[dataset_i])

    # Second, we use a complete grid search
    params_combinations = data_handler.get_params_combinations()

    # For each iteration we choose ALL the hyperparameters and we use them with K-fold CV
    for trial, params in enumerate(params_combinations):

        # Creation of the Neural Network object
        nn_i = BinaryNN(params=params, monk_i=dataset_i+1, trial=i+1)

         # Building the model
        nn_i.create_model(n_hidden_layers=n_hidden_layers_list[dataset_i])

        # For each K-fold returns the indexes of the data splitted in: <X_train,y_train> and <X_val,y_val>
        for train_index, val_index in kfold.split(X, y):
            x_kfold_train, x_kfold_val = X[train_index], X[val_index]
            y_kfold_train, y_kfold_val = y[train_index], y[val_index]


            # Training the model
            nn_i.fit(
                x_train=x_kfold_train,
                y_train=y_kfold_train,
                x_val=x_kfold_val,
                y_val=y_kfold_val
            )

            # Evaluating the model
            nn_i.evaluate(
                x_train=x_kfold_train,
                y_train=y_kfold_train,
                x_val=x_kfold_val,
                y_val=y_kfold_val
            )

        # Case of first append
        if len(nn) == dataset_i:
            nn.append(nn_i)

        # Print the results of this trial
        print("------------------ Current Hyperparameters ------------------")
        nn_i.print_training_info()
        print("-------------------- Best Hyperparameters -------------------")
        nn[dataset_i].print_training_info()
        print("\n\n")

        # Update best hyperparameters if: no high overfitting AND (higher mean VL accuracy OR (equal mean AND
        if nn_i.mean_tr_accuracy-0.1 <= nn_i.mean_vl_accuracy \
            and (
                    nn[dataset_i].mean_vl_accuracy < nn_i.mean_vl_accuracy \
                or (
                    nn[dataset_i].mean_vl_accuracy == nn_i.mean_vl_accuracy and nn[dataset_i].mean_tr_accuracy < nn_i.mean_tr_accuracy
                    )
            ):
            nn[dataset_i] = nn_i

        # Case of TR/VL AND TR/VL loss minor
        if nn_i.mean_tr_accuracy == 1 and nn_i.mean_vl_accuracy == 1 \
            and nn_i.mean_tr_accuracy == nn[dataset_i].mean_tr_accuracy \
            and nn_i.mean_vl_accuracy == nn[dataset_i].mean_vl_accuracy \
            and abs(nn_i.mean_tr_accuracy - nn_i.mean_vl_accuracy) < 0.02 \
            and nn_i.mean_vl_loss < nn[dataset_i].mean_vl_loss \
            and nn_i.mean_tr_loss < nn[dataset_i].mean_tr_loss:
            nn[dataset_i] = nn_i

        # Exit case
        if nn_i.mean_tr_accuracy == 1 and nn_i.mean_vl_accuracy == 1 \
            and nn_i.mean_vl_loss < 0.1 and nn_i.mean_tr_loss < 0.1 \
            and abs(nn_i.mean_vl_loss - nn_i.mean_tr_loss) < 0.01:
            nn[dataset_i] = nn_i
            break

    # Print output
    print(f"### Best Hyperparameters of Monk {dataset_i+1} ###")
    nn[dataset_i].print_training_info()
    print("\n\n")

## Print of best Hyperparameters 

In [None]:
# Iteration on all the Datasets
for dataset_i in range(datasets_number):

    # Print best hyperparameters
    print(f"\n### Best Hyperparameters for Monk {dataset_i+1} ###")
    nn[dataset_i].print_training_info()
    nn[dataset_i].print_plot()



# Retraining best models


In [None]:
# Iterations on each Dataset
for dataset_i in range(datasets_number):

    # Retraining the model on the whole dataset
    nn[dataset_i].fit(
        x_train=x_train[dataset_i].values,
        y_train=y_train[dataset_i].values,
        retraining=True
    )

    # Print values
    print(f"\n### Retraining of Monk {dataset_i+1} ###")
    nn[dataset_i].print_training_info()


## Different weights inizialization: Mean, Standard Deviation and Variance

In [None]:
import numpy as np

# Define the number of initializations
num_initializations = 5

# Iterate over each monk (dataset)
for dataset_i in range(datasets_number):
    print(f"\n### Monk {dataset_i + 1} ###")

    # Extract dataset for the current monk
    x = x_train[dataset_i].values
    y_fit = y_train[dataset_i].values
    X = x_test[dataset_i].values
    y = y_test[dataset_i].values

    tr_mse_values = []  # List to store MSE TR values for each initialization
    vl_mse_values = []  # List to store MSE VL values for each initialization
    vl_acc_values = []   # List to store Accuracies values for each initialization
    tr_acc_values = []   # List to store Accuracies values for each initialization


    # Inner loop for different initializations
    for _ in range(num_initializations):
        # Create a new model instance with the best hyperparameters for the current monk
        nn_instance = BinaryNN(params=nn[dataset_i].params, monk_i=dataset_i+1, trial=_+1)
        nn_instance.create_model(n_hidden_layers=n_hidden_layers_list[dataset_i])
        
        # Training the model
        nn_instance.fit(x_train=x,
               y_train=y_fit,
                x_val=X,            # "val" would be the test dataset in this case
                y_val=y,           
                        )
        nn_instance.evaluate(x_train=x,
               y_train=y_fit,
                x_val=X,            # "val" would be the test dataset in this case
                y_val=y,           
                        )

       
        # Access the training loss from the nn_instance and store it
        tr_mse_values.append(nn_instance.mean_tr_loss)
        vl_mse_values.append(nn_instance.mean_vl_loss)
        tr_acc_values.append(nn_instance.mean_tr_accuracy)
        vl_acc_values.append(nn_instance.mean_vl_accuracy)

        print("")
        nn_instance.print_training_info()
        print("")


    # Calculate and print mean, variance and standard deviation
    
    # Mean TR mse
    meantr_mse = np.mean(tr_mse_values)
    # Mean VL mse
    meanvl_mse = np.mean(vl_mse_values)
    
    # Mean TR accuracies
    meantr_acc = np.mean(tr_acc_values)
    # Mean VL accuracies
    meanvl_acc = np.mean(vl_acc_values)

    # Variance MSE VL
    variance_mse_vl = np.var(vl_mse_values)
    # Variance MSE TR
    variance_mse_tr = np.var(tr_mse_values)

    # Variance TR accuracies
    variancetr_acc = np.var(tr_acc_values)
    # Variance VL accuracies
    variancevl_acc = np.var(vl_acc_values)

    # Standard dev TR accuracies
    std_tr_acc = np.std(tr_acc_values)
    # Standard dev VL accuracies
    std_vl_acc = np.std(vl_acc_values)
    
    # Standard dev VL mse
    std_deviation_vl = np.std(vl_mse_values)
    # Standard dev TR mse
    std_deviation_tr = np.std(tr_mse_values)

    print(f'\nMean TR MSE: {meantr_mse}')
    print(f'\nMean VL MSE: {meanvl_mse}')
    print(f'\nMean TR Accuracy: {meantr_acc}')
    print(f'\nMean VL Accuracy: {meanvl_acc}')
    print(f'\nVariance TR MSE: {variance_mse_tr}')
    print(f'\nVariance VL MSE: {variance_mse_vl}')
    print(f'\nVariance TR Accuracy: {variancetr_acc}')
    print(f'\nVariance VL Accuracy: {variancevl_acc}')
    print(f'\nStandard Deviation VL MSE: {std_deviation_vl}')
    print(f'Standard Deviation VL Accuracy: {std_vl_acc}')

    # Plot learning curves
    nn_instance.print_plot()


# Model Assessment & Evaluation 

In [None]:
# Final testing of the Best Models for each Test set
for dataset_i in range(datasets_number):
    X = x_test[dataset_i].values
    y = y_test[dataset_i].values

    # Evaluate the Model on TS set
    nn[dataset_i].test(
        x_test=X,
        y_test=y
    )

    # Computes the score of the Model
    nn[dataset_i].score(x_test=X, y_test=y)

    # Prints the results obtained
    print(nn[dataset_i])
    nn[dataset_i].print_confusion_matrix(y_test=y)
    nn[dataset_i].print_roc_curve(y_test=y)