# NN implemetation with Keras

## Imports

In [21]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import SGD
import itertools

## Utils functions

In [22]:
# Funzione per normalizzare i dati
def normalize(data):
    scaler = StandardScaler()
    normalized_data = scaler.fit_transform(data)
    return normalized_data


In [23]:
def split_data(data, labels, k=5):
    """
    Divide i dati in k fold.
    
    Args:
        data (np.ndarray | pd.DataFrame): Dati di input.
        labels (np.ndarray | pd.Series): Etichette.
        k (int): Numero di fold.
    
    Returns:
        list: Lista di tuple (fold_data, fold_labels).
    """
    if not isinstance(data, np.ndarray):
        data = data.to_numpy()  # Converte DataFrame in NumPy array
    if not isinstance(labels, np.ndarray):
        labels = labels.to_numpy()  # Converte Series in NumPy array
    
    data = normalize(data)

    fold_size = len(data) // k
    indices = np.arange(len(data))
    np.random.shuffle(indices)
    folds = []

    for i in range(k):
        fold_indices = indices[i * fold_size: (i + 1) * fold_size]
        fold_data = data[fold_indices]
        fold_labels = labels[fold_indices]
        folds.append((fold_data, fold_labels))
    
    return folds

In [24]:
def generate_hyperparameter_combinations(param_ranges):
    """
    Genera tutte le combinazioni di iperparametri basate su range e step specificati.

    :param param_ranges: Dizionario con i nomi degli iperparametri come chiavi.
                         Ogni valore è una tupla (start, stop, step).
    :return: Lista di dizionari con tutte le combinazioni possibili.
    """
    param_values = {
        key: np.arange(start, stop + step, step)
        for key, (start, stop, step) in param_ranges.items()
    }
    
    param_combinations = list(itertools.product(*param_values.values()))
    return [
        dict(zip(param_values.keys(), combination))
        for combination in param_combinations
    ]

## Model creation

In [25]:


def create_nn(input_dim, learning_rate=0.01, hidden_size = 6, hidden_layers = 1,regularization=0.01, momentum=0.9, alpha=0.01):
    """
    Create a binary classification neural network with Leaky ReLU and configurable parameters.
    
    Parameters:
    - input_dim (int): Number of input features.
    - layers_config (list of int): List where each element represents the number of nodes in a layer.
    - learning_rate (float): Learning rate for the optimizer.
    - regularization (float): L2 regularization factor.
    - momentum (float): Momentum for the optimizer.
    - alpha (float): Negative slope coefficient for Leaky ReLU.
    
    Returns:
    - model (tf.keras.Model): Compiled Keras model.
    """
    model = Sequential()
    
    # Input layer
    model.add(Dense(hidden_size, 
                    input_dim=input_dim, 
                    kernel_regularizer=l2(regularization)))
    model.add(LeakyReLU(alpha=alpha))
    
    # Hidden layers with Leaky ReLU 
    for i in range(hidden_layers):
        model.add(Dense(hidden_size, kernel_regularizer=l2(regularization)))
        model.add(LeakyReLU(alpha=alpha))
    
    
    # Output layer (binary classification)
    model.add(Dense(1, activation='sigmoid'))
    
    # Optimizer
    optimizer = SGD(learning_rate=learning_rate, momentum=momentum)
    
    # Compile the model
    model.compile(optimizer=optimizer, 
                  loss='binary_crossentropy', 
                  metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])
    
    return model

## Double-k-fold cross validation

In [26]:
def double_k_fold_cross_validation(data, labels, in_size = 6, outer_k = 5, inner_k = 5, param_grid=None):
    """
    Implementa una Double K-Fold Cross-Validation

    Args:
        data (np.ndarray): Caratteristiche del dataset.
        labels (np.ndarray): Etichette del dataset.
        outer_k (int): Numero di fold per la validazione esterna.
        inner_k (int): Numero di fold per l'ottimizzazione iperparametri.
        param_grid (list): Lista di dizionari con gli iperparametri da provare.
    
    Returns:
        list: Lista dei punteggi ottenuti per ogni fold esterno.
    """
    outer_scores = []
    outer_params = []
    outer_folds = split_data(data, labels, k=outer_k)
    
    for i in range(outer_k):
        print("Outer fold", i + 1)
        outer_test_data, outer_test_labels = outer_folds[i]
        outer_train_data = np.concatenate([fold[0] for j, fold in enumerate(outer_folds) if j != i])
        outer_train_labels = np.concatenate([fold[1] for j, fold in enumerate(outer_folds) if j != i])
        
        best_params = {}
        best_score = -np.inf
        
        # Validazione interna per ottimizzazione iperparametri
        inner_folds = split_data(outer_train_data, outer_train_labels, k=inner_k)

        for params in param_grid:
            inner_scores = []
            
            for j in range(inner_k):
                inner_val_data, inner_val_labels = inner_folds[j]
                inner_train_data = np.concatenate([fold[0] for l, fold in enumerate(inner_folds) if l != j])
                inner_train_labels = np.concatenate([fold[1] for l, fold in enumerate(inner_folds) if l != j])
                # Creazione della rete neurale
                model = create_nn(input_dim = in_size, learning_rate=params['learning_rate'], hidden_size = params['hidden_size'], 
                                  hidden_layers = params['hidden_layers'], regularization=params['regularization'], momentum=params['momentum'], alpha=params['alpha'])

                # Allena il modello e ottieni la cronologia (history)
                history = model.fit(inner_train_data, inner_train_labels, epochs=params['epochs'],
                    batch_size=params['batch_size'],
                    validation_data=(inner_val_data, inner_val_labels))
                    
                inner_scores.append(max(history.history['val_accuracy'])
)
            
            avg_score = np.mean(inner_scores)
            if avg_score > best_score:
                best_score = avg_score
                best_params = params
        
        # Addestramento finale sul set di train esterno
        final_model = create_nn(input_dim = in_size, learning_rate=best_params['learning_rate'], hidden_size = best_params['hidden_size'], 
                                  hidden_layers = best_params['hidden_layers'], regularization=best_params['regularization'], momentum=best_params['momentum'], alpha=best_params['alpha'])
        history = final_model.fit(outer_train_data, outer_train_labels, epochs=best_params['epochs'],
                    batch_size=best_params['batch_size'], validation_data=(outer_test_data, outer_test_labels))
        outer_scores.append(max(history.history['val_accuracy']))
        outer_params.append(best_params)
    
    return outer_scores, outer_params

## K-fold cross validation

In [27]:
def k_fold_cross_validation(data, labels, in_size, out_size, params, k=5):
    """
    Implementa una K-Fold Cross-Validation
    """
    scores = []
    folds = split_data(data, labels, input_size = 6, output_size = 1, k=k)


    for i in range(k):
        # Creazione della rete neurale
        model = create_nn(input_dim = in_size, learning_rate=params['learning_rate'], hidden_size = params['hidden_size'], 
                                  hidden_layers = params['hidden_layers'], regularization=params['regularization'], momentum=params['momentum'], alpha=params['alpha'])

        test_data, test_labels = folds[i]
        train_data = np.concatenate([fold[0] for j, fold in enumerate(folds) if j != i])
        train_labels = np.concatenate([fold[1] for j, fold in enumerate(folds) if j != i])
        
        history = model.fit(train_data, train_labels, epochs=params['epochs'],
                    batch_size=params['batch_size'], validation_data=(test_data, test_labels))
        score = max(history.history['val_accuracy'])
        scores.append(score)
            
    avg_score = np.mean(scores)

    train_data = np.concatenate([fold[0] for fold in folds])
    train_labels = np.concatenate([fold[1] for fold in folds])

    model = create_nn(input_dim = in_size, learning_rate=params['learning_rate'], hidden_size = params['hidden_size'], 
                                  hidden_layers = params['hidden_layers'], regularization=params['regularization'], momentum=params['momentum'], alpha=params['alpha'])

        
    history = model.fit(train_data, train_labels, epochs=params['epochs'],
                    batch_size=params['batch_size'])


    return avg_score, model

## Data loading

In [28]:
# Carica i file di addestramento e test per ciascun dataset dal percorso specificato
monk1_train = pd.read_csv('../Datasets/Monks/monks-1.train', sep='\s+', header=None)
monk1_test = pd.read_csv('../Datasets/Monks/monks-1.test', sep='\s+', header=None)

monk2_train = pd.read_csv('../Datasets/Monks/monks-2.train', sep='\s+', header=None)
monk2_test = pd.read_csv('../Datasets/Monks/monks-2.test', sep='\s+', header=None)

monk3_train = pd.read_csv('../Datasets/Monks/monks-3.train', sep='\s+', header=None)
monk3_test = pd.read_csv('../Datasets/Monks/monks-3.test', sep='\s+', header=None)


# Separazione tra features e labels per monk1
X1_train = monk1_train.iloc[:, 1:7].values  # Caratteristiche (features)
y1_train = monk1_train.iloc[:, 0].values   # Etichette (labels)

X1_test = monk1_test.iloc[:, 1:7].values
y1_test = monk1_test.iloc[:, 0].values

# Separazione tra features e labels per monk2
X2_train = monk2_train.iloc[:, 1:7].values
y2_train = monk2_train.iloc[:, 0].values

X2_test = monk2_test.iloc[:, 1:7].values
y2_test = monk2_test.iloc[:, 0].values

# Separazione tra features e labels per monk3
X3_train = monk3_train.iloc[:, 1:7].values
y3_train = monk3_train.iloc[:, 0].values

X3_test = monk3_test.iloc[:, 1:7].values
y3_test = monk3_test.iloc[:, 0].values

In [29]:
# Definisce i parametri della rete neurale
input_size = 6
output_size = 1

# Definizione dei range degli iperparametri
param_ranges = {
    "learning_rate": (0.001, 0.001, 0.005),  # Da 0.001 a 0.01 con step di 0.005
    "epochs": (50, 50, 1),                   # Da 0 a 10 con step di 1
    "batch_size": (8, 8, 8),             # Da 8 a 32 con step di 8
    "hidden_size": (3, 3, 1),           # Da 32 a 128 con step di 32
    "hidden_layers": (1, 1, 1),             # Da 1 a 3 con step di 1
    "momentum": (0.9, 0.92, 0.01),          # Da 0.9 a 0.99 con step di 0.01
    "regularization": (0.01, 0.01, 0.05),             # Da 0.0 a 0.1 con step di 0.05
    "alpha": (0.01, 0.01, 0.01)              # Da 0.01 a 0.1 con step di 0.05
}

#start_time = time.time()
print("Generazione delle combinazioni di iperparametri...")
param_grid = []
param_grid = generate_hyperparameter_combinations(param_ranges)
#end_time = time.time()

#execution_time = end_time - start_time
#print(f"Tempo di esecuzione di generate_hyperparameter_combinations: {execution_time} secondi")

scores,params = double_k_fold_cross_validation(X1_train, y1_train, input_size, outer_k=5, inner_k=3, param_grid=param_grid)
for i in range(len(scores)):
    print(f"Fold {i + 1}: {scores[i]}")
    print(f"  params: {params[i]}")
print("Punteggio medio:", np.mean(scores))

Generazione delle combinazioni di iperparametri...
Outer fold 1
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.4608 - auc: 0.5077 - loss: 0.7588 - val_accuracy: 0.4062 - val_auc: 0.5273 - val_loss: 0.7624
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4547 - auc: 0.5034 - loss: 0.7653 - val_accuracy: 0.5625 - val_auc: 0.5820 - val_loss: 0.7604
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4823 - auc: 0.5341 - loss: 0.7596 - val_accuracy: 0.5625 - val_auc: 0.6152 - val_loss: 0.7584
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.4514 - auc: 0.6918 - loss: 0.7559 - val_accuracy: 0.5312 - val_auc: 0.6113 - val_loss: 0.7565
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4327 - auc: 0.5588 - loss: 0.7611 - val_accuracy: 0.5312 - val_auc: 0.6348 - val_loss: 0.7551
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.5859 - auc: 0.6268 - loss: 0.7477 - val_accuracy: 0.5625 - val_auc: 0.6806 - val_loss: 0.7361
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.5982 - auc: 0.5932 - loss: 0.7202 - val_accuracy: 0.5625 - val_auc: 0.6806 - val_loss: 0.7358
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.5509 - auc: 0.6287 - loss: 0.7200 - val_accuracy: 0.5625 - val_auc: 0.6806 - val_loss: 0.7352
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5532 - auc: 0.5875 - loss: 0.7171 - val_accuracy: 0.5625 - val_auc: 0.6687 - val_loss: 0.7346
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5601 - auc: 0.6537 - loss: 0.7277 - val_accuracy: 0.5938 - val_auc: 0.6825 - val_loss: 0.7339
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step - accuracy: 0.4802 - auc: 0.6459 - loss: 0.7055 - val_accuracy: 0.5000 - val_auc: 0.7520 - val_loss: 0.7346
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4643 - auc: 0.6858 - loss: 0.7145 - val_accuracy: 0.7500 - val_auc: 0.7421 - val_loss: 0.7340
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6312 - auc: 0.7397 - loss: 0.6853 - val_accuracy: 0.7500 - val_auc: 0.7282 - val_loss: 0.7335
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5898 - auc: 0.7204 - loss: 0.7100 - val_accuracy: 0.7500 - val_auc: 0.7024 - val_loss: 0.7327
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7105 - auc: 0.7407 - loss: 0.6874 - val_accuracy: 0.7188 - val_auc: 0.7063 - val_loss: 0.7320
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.5302 - auc: 0.5612 - loss: 0.7589 - val_accuracy: 0.3125 - val_auc: 0.2676 - val_loss: 0.7936
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6227 - auc: 0.6775 - loss: 0.7410 - val_accuracy: 0.3438 - val_auc: 0.2539 - val_loss: 0.7934
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4990 - auc: 0.5442 - loss: 0.7529 - val_accuracy: 0.3438 - val_auc: 0.2520 - val_loss: 0.7932
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6038 - auc: 0.5608 - loss: 0.7576 - val_accuracy: 0.3438 - val_auc: 0.2266 - val_loss: 0.7929
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5481 - auc: 0.5370 - loss: 0.7580 - val_accuracy: 0.3125 - val_auc: 0.2266 - val_loss: 0.7926
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - accuracy: 0.4843 - auc: 0.3953 - loss: 1.0767 - val_accuracy: 0.4062 - val_auc: 0.3745 - val_loss: 0.9654
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4252 - auc: 0.5104 - loss: 1.0253 - val_accuracy: 0.4062 - val_auc: 0.3745 - val_loss: 0.9352
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4799 - auc: 0.5153 - loss: 0.9428 - val_accuracy: 0.4375 - val_auc: 0.3907 - val_loss: 0.9024
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.4747 - auc: 0.5102 - loss: 0.9565 - val_accuracy: 0.4688 - val_auc: 0.4069 - val_loss: 0.8738
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4861 - auc: 0.4344 - loss: 0.9587 - val_accuracy: 0.4688 - val_auc: 0.4049 - val_loss: 0.8535
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37