## 1' Addestramento Rete Feed-Forward

In [32]:
import random
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, Subset
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
import matplotlib.pyplot as plt
import pickle

# Define the Data Layer
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y).view(-1, 1)  # Reshape to (batch_size, 1) for regression

        self.num_features = X.shape[1]
        self.num_classes = 1  # Regression has only one output

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx, :], self.y[idx]

def feedforward_regression(input_size, hidden_size1, hidden_size2):
    model = nn.Sequential(
        nn.Linear(input_size, hidden_size1),
        nn.ReLU(),
        nn.Linear(hidden_size1, hidden_size2),
        nn.ReLU(),
        nn.Linear(hidden_size2, 1)
    )
    return model


# Define a function for the training process for regression
def train_model_regression(model, criterion, optimizer, epoch, train_loader, val_loader, device):
    n_iter = 0
    best_valid_loss = float('inf')
    train_losses = []  # Lista per registrare le perdite durante l'addestramento

    for epoch in range(epoch):
        model.train()
        epoch_loss = 0.0

        for data, targets in train_loader:
            data, targets = data.to(device), targets.to(device)

            optimizer.zero_grad()
            y_pred = model(data)
            loss = criterion(y_pred, targets)
            loss.backward()
            optimizer.step()
            n_iter += 1

            epoch_loss += loss.item()

        average_loss = epoch_loss / len(train_loader)
        train_losses.append(average_loss)

        labels, y_pred = test_model_regression(model, val_loader, device)
        loss_val = criterion(y_pred, labels)

        print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {average_loss}, Validation Loss: {loss_val.item()}")

    return model, train_losses

# Define a function to evaluate the performance on validation and test sets for regression
def test_model_regression(model, data_loader, device):
    model.eval()
    y_pred = []
    y_test = []

    for data, targets in data_loader:
        data, targets = data.to(device), targets.to(device)
        y_pred.append(model(data))
        y_test.append(targets)

    y_test = torch.cat(y_test)
    y_pred = torch.cat(y_pred)

    return y_test, y_pred

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device: {}".format(device))

# Train hyperparameters
num_epochs = 200  # Increase the number of epochs
learning_rate = 0.0001  # Adjust the learning rate

# Load data
FILENAME = "train.csv"
df = pd.read_csv(FILENAME)

seed = 42

# Select input variables (X) and output variable (y)
X = df.drop("Year", axis=1).values  # Convert to numpy array
y = df["Year"].values

# Split data
indices = np.arange(X.shape[0])
train, test = train_test_split(indices, test_size=0.2, random_state=seed)
train, val = train_test_split(train, test_size=0.2, random_state=seed)

# Scale data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X[train, :])
X_val_scaled = scaler.transform(X[val, :])
X_test_scaled = scaler.transform(X[test, :])

# Create datasets and dataloaders
train_dataset = MyDataset(X_train_scaled, y[train])
train_loader = DataLoader(train_dataset, shuffle=True)

val_dataset = MyDataset(X_val_scaled, y[val])
val_loader = DataLoader(val_dataset, batch_size=1)

test_dataset = MyDataset(X_test_scaled, y[test])
test_loader = DataLoader(test_dataset, batch_size=1)

# Define the architecture, loss, and optimizer
hidden_size1 = 64  # Increase the size of hidden layers
hidden_size2 = 32
model = feedforward_regression(train_dataset.num_features, hidden_size1, hidden_size2)
model.to(device)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  # Use Adam optimizer

# Learning Rate Scheduling
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

# Test before training
y_test, y_pred = test_model_regression(model, test_loader, device)
loss_before_training = criterion(y_pred, y_test)
print("Mean Squared Error before training:", loss_before_training.item())

# Train the model
model, train_losses = train_model_regression(model, criterion, optimizer, num_epochs, train_loader, val_loader, device)

torch.save(model, open("tabNet.save", 'wb'))


# Test after training
y_test, y_pred = test_model_regression(model, test_loader, device)
mse = mean_squared_error(y_test.detach().numpy(), y_pred.detach().numpy())
mae = mean_absolute_error(y_test.detach().numpy(), y_pred.detach().numpy())
mape = np.mean(np.abs((y_test.detach().numpy() - y_pred.detach().numpy()) / y_test.detach().numpy())) * 100
r2 = r2_score(y_test.detach().numpy(), y_pred.detach().numpy())

print(f'Mean Squared Error (MSE): {mse}')
print(f'Mean Absolute Error (MAE): {mae}')
print(f'Mean Absolute Percentage Error (MAPE): {mape}')
print(f'R2 Score: {r2}')


Device: cpu
Mean Squared Error before training: 3993652.75
Epoch 1/200, Training Loss: 182601.5081655045, Validation Loss: 640.7804565429688
Epoch 2/200, Training Loss: 267.1186880402143, Validation Loss: 216.2014923095703
Epoch 3/200, Training Loss: 166.692339017811, Validation Loss: 157.46974182128906
Epoch 4/200, Training Loss: 153.61122274817203, Validation Loss: 258.3143005371094
Epoch 5/200, Training Loss: 146.65975865172575, Validation Loss: 503.9530029296875
Epoch 6/200, Training Loss: 144.1117364732347, Validation Loss: 138.13839721679688
Epoch 7/200, Training Loss: 141.1559369420193, Validation Loss: 124.92597198486328
Epoch 8/200, Training Loss: 137.86764221175707, Validation Loss: 342.4842224121094
Epoch 9/200, Training Loss: 135.62559521413334, Validation Loss: 156.5460662841797
Epoch 10/200, Training Loss: 136.9560244379702, Validation Loss: 144.050537109375
Epoch 11/200, Training Loss: 132.0970150914271, Validation Loss: 109.77495574951172
Epoch 12/200, Training Loss: 13

KeyboardInterrupt: 