# NN implemetation with Keras

## Imports

In [9]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

## Utils functions

In [10]:
# Funzione per normalizzare i dati
def normalize(data):
    scaler = StandardScaler()
    normalized_data = scaler.fit_transform(data)
    return normalized_data


In [11]:
def split_data(data, labels, k):
    """
    Divide i dati in k fold.
    
    Args:
        data (np.ndarray | pd.DataFrame): Dati di input.
        labels (np.ndarray | pd.Series): Etichette.
        k (int): Numero di fold.
    
    Returns:
        list: Lista di tuple (fold_data, fold_labels).
    """
    if not isinstance(data, np.ndarray):
        data = data.to_numpy()  # Converte DataFrame in NumPy array
    if not isinstance(labels, np.ndarray):
        labels = labels.to_numpy()  # Converte Series in NumPy array
    
    data = normalize(data)

    fold_size = len(data) // k
    indices = np.arange(len(data))
    np.random.shuffle(indices)
    folds = []

    for i in range(k):
        fold_indices = indices[i * fold_size: (i + 1) * fold_size]
        fold_data = data[fold_indices]
        fold_labels = labels[fold_indices]
        folds.append((fold_data, fold_labels))
    
    return folds

## Double-k-fold cross validation

In [None]:
def double_k_fold_cross_validation(data, labels, in_size = 6, out_size = 1, outer_k = 5, inner_k = 5, param_grid=None):
    """
    Implementa una Double K-Fold Cross-Validation

    Args:
        data (np.ndarray): Caratteristiche del dataset.
        labels (np.ndarray): Etichette del dataset.
        outer_k (int): Numero di fold per la validazione esterna.
        inner_k (int): Numero di fold per l'ottimizzazione iperparametri.
        param_grid (list): Lista di dizionari con gli iperparametri da provare.
    
    Returns:
        list: Lista dei punteggi ottenuti per ogni fold esterno.
    """
    outer_scores = []
    outer_params = []
    outer_folds = split_data(data, labels, k=outer_k)
    
    for i in range(outer_k):
        print("Outer fold", i + 1)
        outer_test_data, outer_test_labels = outer_folds[i]
        outer_train_data = np.concatenate([fold[0] for j, fold in enumerate(outer_folds) if j != i])
        outer_train_labels = np.concatenate([fold[1] for j, fold in enumerate(outer_folds) if j != i])
        
        best_params = {}
        best_score = -np.inf
        
        # Validazione interna per ottimizzazione iperparametri
        inner_folds = split_data(outer_train_data, outer_train_labels, k=inner_k)

        for params in param_grid:
            inner_scores = []
            
            for j in range(inner_k):
                inner_val_data, inner_val_labels = inner_folds[j]
                inner_train_data = np.concatenate([fold[0] for l, fold in enumerate(inner_folds) if l != j])
                inner_train_labels = np.concatenate([fold[1] for l, fold in enumerate(inner_folds) if l != j])
                # Creazione della rete neurale
                model = NeuralNetwork(input_size = in_size, output_size = out_size, **params)
                model.train(inner_train_data, inner_train_labels, inner_val_data, inner_val_labels)
                score = evaluate_model(model, inner_val_data, inner_val_labels)
                inner_scores.append(score)
            
            avg_score = np.mean(inner_scores)
            if avg_score > best_score:
                best_score = avg_score
                best_params = params
        
        # Addestramento finale sul set di train esterno
        final_model = NeuralNetwork(input_size = in_size, output_size = out_size, **best_params)
        final_model.train(outer_train_data, outer_train_labels, outer_test_data, outer_test_labels)
        outer_score = evaluate_model(final_model, outer_test_data, outer_test_labels)
        outer_scores.append(outer_score)
        outer_params.append(best_params)
    
    return outer_scores, outer_params

## K-fold cross validation

In [None]:
def k_fold_cross_validation(data, labels, in_size, out_size, params, k=5):
    """
    Implementa una K-Fold Cross-Validation
    """
    scores = []
    folds = split_data(data, labels, input_size = 6, output_size = 1, k=k)


    for i in range(k):
        # Creazione della rete neurale
        model = NeuralNetwork(input_size = in_size, output_size = out_size, **params)

        test_data, test_labels = folds[i]
        train_data = np.concatenate([fold[0] for j, fold in enumerate(folds) if j != i])
        train_labels = np.concatenate([fold[1] for j, fold in enumerate(folds) if j != i])
        
        model.train(train_data, train_labels)
        score = evaluate_model(model, test_data, test_labels)
        scores.append(score)
            
    avg_score = np.mean(scores)

    train_data = np.concatenate([fold[0] for fold in folds])
    train_labels = np.concatenate([fold[1] for fold in folds])

    model = NeuralNetwork(input_size=in_size, output_size=out_size, **params)
    model.train(train_data, train_labels)
    
    return avg_score, model