In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Lettura del dataset da CSV
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/data.csv')

# Nomi delle colonne
input_columns = ['Anthropogenic Forcing', 'CO2', 'CH4']  # Tutte le colonne di input
output_columns = ['Mental and Behavioural Disorder']

# Separazione delle caratteristiche e delle etichette
X = df[input_columns].values
y = df[output_columns].values

# Normalizzazione dei dati climatici
scaler_X = StandardScaler()
X = scaler_X.fit_transform(X)

# Normalizzazione delle etichette
scaler_y = StandardScaler()
y = scaler_y.fit_transform(y)

# Funzione per creare e addestrare il modello
def train_model(X_train, y_train, X_val, y_val, X_test, y_test, alpha=0.5, beta=0.5, num_epochs=200):
    # Definizione del modello con dropout
    class FeedforwardNN(nn.Module):
        def __init__(self):
            super(FeedforwardNN, self).__init__()
            self.hidden = nn.Linear(3, 250)  # 2 input features
            self.dropout = nn.Dropout(0.2)
            self.output = nn.Linear(250, 1)  # 5 output features

        def forward(self, x):
            x = torch.relu(self.hidden(x))
            x = self.dropout(x)  # Aggiunta del dropout
            x = self.output(x)  # Usiamo linear per output continuo
            return x

    model = FeedforwardNN()

    # Definizione della funzione di perdita e dell'ottimizzatore
    criterion = nn.MSELoss()
    learning_rate = 0.0004
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Callback per Early Stopping
    class EarlyStopping:
        def __init__(self, patience=5, min_delta=0):
            self.patience = patience
            self.min_delta = min_delta
            self.counter = 0
            self.best_loss = None
            self.early_stop = False

        def __call__(self, val_loss):
            if self.best_loss is None:
                self.best_loss = val_loss
            elif val_loss > self.best_loss - self.min_delta:
                self.counter += 1
                if self.counter >= self.patience:
                    self.early_stop = True
            else:
                self.best_loss = val_loss
                self.counter = 0

    # Inizializzazione di Early Stopping
    early_stopping = EarlyStopping(patience=10, min_delta=0.001)

    # Addestramento del modello
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)

        # Aggiunta del termine di regolarizzazione bayesiana
        reg_loss = 0
        for param in model.parameters():
            reg_loss += alpha * torch.sum(torch.log(1 + beta * torch.square(param)))
        loss += reg_loss

        loss.backward()
        optimizer.step()

        # Valutazione del modello
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val)
            val_loss = criterion(val_outputs, y_val)

            test_outputs = model(X_test)
            test_loss = criterion(test_outputs, y_test)

        # Check per early stopping
        early_stopping(val_loss)
        if early_stopping.early_stop:
           # print("Early stopping")
            break

    # Calcolo degli errori (target - output) per training, validazione e test
    with torch.no_grad():
        train_pred = model(X_train).numpy()
        val_pred = model(X_val).numpy()
        test_pred = model(X_test).numpy()

    train_errors = y_train.numpy() - train_pred
    val_errors = y_val.numpy() - val_pred
    test_errors = y_test.numpy() - test_pred

    # Calcolo della varianza degli errori per ogni variabile di output
    train_variances = np.var(train_errors, axis=0)
    val_variances = np.var(val_errors, axis=0)
    test_variances = np.var(test_errors, axis=0)

    # Calcolo del coefficiente di correlazione per ogni variabile di output
    correlations = [np.corrcoef(y_test[:, i], test_pred[:, i])[0, 1] for i in range(y_test.shape[1])]

    return loss.item(), val_loss.item(), test_loss.item(), train_variances, val_variances, test_variances, correlations

# Esecuzione della grid search per ottimizzare alpha e beta
alpha_values = [0.58, 0.59,0.60,0.61,0.62]
beta_values =  [0.0115,0.012,0.0125,0.013]
best_params = None
best_val_loss = float('inf')
grid_search_results = []

for alpha in alpha_values:
    for beta in beta_values:
        print(f'Alpha: {alpha}, Beta: {beta}')
        val_losses = []
        for run in range(10):  # Eseguiamo 5 run per ciascuna combinazione di parametri
            # Suddivisione dei dati in training (60%), validazione (20%) e test (20%)
            X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=run)
            X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=run)

            # Conversione a tensori PyTorch
            X_train = torch.tensor(X_train, dtype=torch.float32)
            X_val = torch.tensor(X_val, dtype=torch.float32)
            X_test = torch.tensor(X_test, dtype=torch.float32)
            y_train = torch.tensor(y_train, dtype=torch.float32)
            y_val = torch.tensor(y_val, dtype=torch.float32)
            y_test = torch.tensor(y_test, dtype=torch.float32)

            _, val_loss, _, _, _, _, _ = train_model(X_train, y_train, X_val, y_val, X_test, y_test, alpha=alpha, beta=beta)
            val_losses.append(val_loss)

        mean_val_loss = np.mean(val_losses)
        grid_search_results.append([alpha, beta, mean_val_loss])
        print(f'Mean Validation Loss: {mean_val_loss}')

        if mean_val_loss < best_val_loss:
            best_val_loss = mean_val_loss
            best_params = (alpha, beta)

# Salvataggio dei risultati della grid search in un file CSV
grid_search_df = pd.DataFrame(grid_search_results, columns=['Alpha', 'Beta', 'Mean Validation Loss'])
grid_search_df.to_csv('grid_search_results.csv', index=False)

print(f'Best Parameters: Alpha={best_params[0]}, Beta={best_params[1]}, Validation Loss={best_val_loss}')

# Addestramento finale del modello con i migliori parametri trovati
final_alpha, final_beta = best_params
all_results = []

for run in range(50):
    # Suddivisione dei dati in training (60%), validazione (20%) e test (20%)
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=run)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=run)

    # Conversione a tensori PyTorch
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_val = torch.tensor(X_val, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32)
    y_val = torch.tensor(y_val, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32)

    train_loss, val_loss, test_loss, train_variances, val_variances, test_variances, correlations = train_model(X_train, y_train, X_val, y_val, X_test, y_test, alpha=final_alpha, beta=final_beta)
    for i, col in enumerate(output_columns):
        all_results.append([run + 1, train_loss, val_loss, test_loss, train_variances[i], val_variances[i], test_variances[i], correlations[i], col])

# Creazione del DataFrame con i risultati
results_df = pd.DataFrame(all_results, columns=['Run', 'Train Loss', 'Validation Loss', 'Test Loss', 'Train Variance', 'Validation Variance', 'Test Variance', 'Correlation', 'Output Variable'])

# Salvataggio dei risultati in un file CSV
results_df.to_csv('all_results.csv', index=False)

# Calcolo dei valori medi, minimi e massimi per ogni variabile di output
summary_df = results_df.groupby('Output Variable').agg({
    'Train Loss': ['mean', 'min', 'max'],
    'Validation Loss': ['mean', 'min', 'max'],
    'Test Loss': ['mean', 'min', 'max'],
    'Train Variance': ['mean', 'min', 'max'],
    'Validation Variance': ['mean', 'min', 'max'],
    'Test Variance': ['mean', 'min', 'max'],
    'Correlation': ['mean', 'min', 'max']
}).reset_index()

# Rinominare le colonne per chiarezza
summary_df.columns = [' '.join(col).strip() for col in summary_df.columns.values]

# Salvataggio dei valori medi, minimi e massimi in un file CSV
summary_df.to_csv('summary_results.csv', index=False)

# Stampa di conferma
print("I risultati sono stati salvati in 'all_results.csv' e 'summary_results.csv'.")
print(f'Best Parameters: Alpha={best_params[0]}, Beta={best_params[1]}, Validation Loss={best_val_loss}')






Alpha: 0.58, Beta: 0.0115
Mean Validation Loss: 0.01943722078576684
Alpha: 0.58, Beta: 0.012
Mean Validation Loss: 0.027537067304365337
Alpha: 0.58, Beta: 0.0125
Mean Validation Loss: 0.02152633140794933
Alpha: 0.58, Beta: 0.013
Mean Validation Loss: 0.03288430878892541
Alpha: 0.59, Beta: 0.0115
Mean Validation Loss: 0.023623011773452163
Alpha: 0.59, Beta: 0.012
Mean Validation Loss: 0.02881516139023006
Alpha: 0.59, Beta: 0.0125
Mean Validation Loss: 0.014768365118652583
Alpha: 0.59, Beta: 0.013
Mean Validation Loss: 0.02669909920077771
Alpha: 0.6, Beta: 0.0115
Mean Validation Loss: 0.02246388723142445
Alpha: 0.6, Beta: 0.012
Mean Validation Loss: 0.03722809855826199
Alpha: 0.6, Beta: 0.0125
Mean Validation Loss: 0.032446001563221215
Alpha: 0.6, Beta: 0.013
Mean Validation Loss: 0.024037303775548934
Alpha: 0.61, Beta: 0.0115
Mean Validation Loss: 0.026737666688859463
Alpha: 0.61, Beta: 0.012
Mean Validation Loss: 0.018570533767342568
Alpha: 0.61, Beta: 0.0125
Mean Validation Loss: 0.01