## 1' Addestramento Rete Feed-Forward

In [4]:
import random
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, Subset
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
import matplotlib.pyplot as plt
import pickle

# Define the Data Layer
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y).view(-1, 1)  # Reshape to (batch_size, 1) for regression

        self.num_features = X.shape[1]
        self.num_classes = 1  # Regression has only one output

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx, :], self.y[idx]

def feedforward_regression(input_size, hidden_size1, hidden_size2):
    model = nn.Sequential(
        nn.Linear(input_size, hidden_size1),
        nn.ReLU(),
        nn.Linear(hidden_size1, 1)
    )
    return model


# Define a function for the training process for regression
def train_model_regression(model, criterion, optimizer, epoch, train_loader, val_loader, device):
    n_iter = 0
    train_losses = []  # Lista per registrare le perdite durante l'addestramento

    for epoch in range(epoch):
        model.train()
        epoch_loss = 0.0

        for data, targets in train_loader:
            data, targets = data.to(device), targets.to(device)

            optimizer.zero_grad()
            y_pred = model(data)
            loss = criterion(y_pred, targets)
            loss.backward()
            optimizer.step()
            n_iter += 1

            epoch_loss += loss.item()

        average_loss = epoch_loss / len(train_loader)
        train_losses.append(average_loss)

        labels, y_pred = test_model_regression(model, val_loader, device)
        loss_val = criterion(y_pred, labels)

        print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {average_loss}, Validation Loss: {loss_val.item()}")

    return model, train_losses

# Define a function to evaluate the performance on validation and test sets for regression
def test_model_regression(model, data_loader, device):
    model.eval()
    y_pred = []
    y_test = []

    for data, targets in data_loader:
        data, targets = data.to(device), targets.to(device)
        y_pred.append(model(data))
        y_test.append(targets)

    y_test = torch.cat(y_test)
    y_pred = torch.cat(y_pred)

    return y_test, y_pred

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device: {}".format(device))

# Train hyperparameters
num_epochs = 200  # Increase the number of epochs
learning_rate = 0.001
batch = 8

# Load data
FILENAME = "train.csv"
df = pd.read_csv(FILENAME)

seed = 42

# Select input variables (X) and output variable (y)
X = df.drop("Year", axis=1).values  # Convert to numpy array
y = df["Year"].values

# Split data
indices = np.arange(X.shape[0])
train, test = train_test_split(indices, test_size=0.2, random_state=seed)
train, val = train_test_split(train, test_size=0.2, random_state=seed)

# Scale data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X[train, :])
X_val_scaled = scaler.transform(X[val, :])
X_test_scaled = scaler.transform(X[test, :])

# Create datasets and dataloaders
train_dataset = MyDataset(X_train_scaled, y[train])
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch)

val_dataset = MyDataset(X_val_scaled, y[val])
val_loader = DataLoader(val_dataset, batch_size=1)

test_dataset = MyDataset(X_test_scaled, y[test])
test_loader = DataLoader(test_dataset, batch_size=1)

# Define the architecture, loss, and optimizer
hidden_size1 = 64  # Increase the size of hidden layers
hidden_size2 = 32
model = feedforward_regression(train_dataset.num_features, hidden_size1, hidden_size2)
model.to(device)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  # Use Adam optimizer

# Learning Rate Scheduling
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

# Test before training
y_test, y_pred = test_model_regression(model, test_loader, device)
loss_before_training = criterion(y_pred, y_test)
print("Mean Squared Error before training:", loss_before_training.item())

# Train the model
model, train_losses = train_model_regression(model, criterion, optimizer, num_epochs, train_loader, val_loader, device)

torch.save(model, open("feedForwardNN.save", 'wb'))

# Test after training
y_test, y_pred = test_model_regression(model, test_loader, device)
mse = mean_squared_error(y_test.detach().numpy(), y_pred.detach().numpy())
mae = mean_absolute_error(y_test.detach().numpy(), y_pred.detach().numpy())
mape = np.mean(np.abs((y_test.detach().numpy() - y_pred.detach().numpy()) / y_test.detach().numpy())) * 100
r2 = r2_score(y_test.detach().numpy(), y_pred.detach().numpy())

print(f'Mean Squared Error (MSE): {mse}')
print(f'Mean Absolute Error (MAE): {mae}')
print(f'Mean Absolute Percentage Error (MAPE): {mape}')
print(f'R2 Score: {r2}')


Device: cpu
Mean Squared Error before training: 3994010.75
Epoch 1/200, Training Loss: 413641.0206246307, Validation Loss: 8235.6416015625
Epoch 2/200, Training Loss: 1417.719732465103, Validation Loss: 159.5406951904297
Epoch 3/200, Training Loss: 151.33763747742594, Validation Loss: 105.64550018310547
Epoch 4/200, Training Loss: 138.47325982519925, Validation Loss: 115.24968719482422
Epoch 5/200, Training Loss: 131.21685945532704, Validation Loss: 116.29090881347656
Epoch 6/200, Training Loss: 132.87336919479543, Validation Loss: 119.732177734375
Epoch 7/200, Training Loss: 134.65481203973525, Validation Loss: 130.2337646484375
Epoch 8/200, Training Loss: 130.50513331599618, Validation Loss: 96.36042785644531
Epoch 9/200, Training Loss: 127.51351657265464, Validation Loss: 103.10466003417969
Epoch 10/200, Training Loss: 126.50888801642118, Validation Loss: 127.1388931274414
Epoch 11/200, Training Loss: 125.93974664280528, Validation Loss: 120.15068817138672
Epoch 12/200, Training Los

In [None]:
import random
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, Subset
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
import matplotlib.pyplot as plt

# For reproducibility\n",
def fix_random(seed: int) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

# Define the Data Layer\n",
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y).view(-1, 1)  # Reshape to (batch_size, 1) for regression\n",
         
        self.num_features = X.shape[1]
        self.num_classes = 1  # Regression has only one output\n",

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx, :], self.y[idx]

# Define a simple neural network for regression\n",
class FeedForwardRegression(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(FeedForwardRegression, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.fc1 = nn.Linear(self.input_size, self.hidden_size)
        self.fc2 = nn.Linear(self.hidden_size, 1)  # Output layer has 1 neuron for regression\n",
        self.relu = nn.ReLU()

    def forward(self, x):
        h = self.fc1(x)
        h = self.relu(h)
        output = self.fc2(h)
        return output

# Define a function for the training process for regression\n",
def train_model_regression(model, criterion, optimizer, epoch, train_loader, val_loader, device, writer, log_name="model"):
    n_iter = 0
    best_valid_loss = float('inf')
    train_losses = []  # Lista per registrare le perdite durante l'addestramento\n",

    for epoch in range(epoch):
        model.train()
        epoch_loss = 0.0

        for data, targets in train_loader:
            data, targets = data.to(device), targets.to(device)
            
            optimizer.zero_grad()
            y_pred = model(data)
            loss = criterion(y_pred, targets)
            writer.add_scalar("Loss/train\", loss, n_iter")
            loss.backward()
            optimizer.step()
            n_iter += 1\

            epoch_loss += loss.item()

        average_loss = epoch_loss / len(train_loader)
        train_losses.append(average_loss)

        labels, y_pred = test_model_regression(model, val_loader, device)
        loss_val = criterion(y_pred, labels)
        writer.add_scalar("Loss/val\", loss_val, epoch")
        
        if loss_val.item() < best_valid_loss:
            best_valid_loss = loss_val.item()
            if not os.path.exists('models'):
                os.makedirs('models')
            torch.save(model.state_dict(), 'models/'+log_name)

        print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {average_loss}, Validation Loss: {loss_val.item()}")

    return model, train_losses\

# Define a function to evaluate the performance on validation and test sets for regression\n",
def test_model_regression(model, data_loader, device):
    model.eval()
    y_pred = []
    y_test = []

    for data, targets in data_loader:
        data, targets = data.to(device), targets.to(device)
        y_pred.append(model(data))
        y_test.append(targets)

    y_test = torch.cat(y_test)
    y_pred = torch.cat(y_pred)

    return y_test, y_pred

# Set device\n",
device = torch.device('cpu')  # or 'cuda' if available\n",
print("Device: {}".format(device))

# Train hyperparameters\n",
num_epochs = 100
learning_rate = 0.01
batch = 16

# Load data\n",
FILENAME = "train.csv",
df = pd.read_csv(FILENAME)

seed = 42

# Select input variables (X) and output variable (y)
X = df.drop("Year", axis=1).values  # Convert to numpy array
y = df["Year"].values\

# Split data\n",
indices = np.arange(X.shape[0])
train_idx, test_idx = train_test_split(indices, test_size=0.2, random_state=seed)
train_idx, val_idx = train_test_split(train_idx, test_size=0.2, random_state=seed)

# Min-Max scaling
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Create dataset\n",
my_dataset = MyDataset(X, y)

# Create subsets and relative dataloaders\n",
train_subset = Subset(my_dataset, train_idx)
train_loader = DataLoader(train_subset, batch_size=batch, shuffle=True)

val_subset = Subset(my_dataset, val_idx),
val_loader = DataLoader(val_subset, batch_size=1)

test_subset = Subset(my_dataset, test_idx)
test_loader = DataLoader(test_subset, batch_size=1)

# Set seed for reproducibility",
fix_random(seed)


# Define the architecture, loss, and optimizer\n",
hidden_size = 32
model = FeedForwardRegression(my_dataset.num_features, hidden_size)
model.to(device)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Test before training\n",
y_test, y_pred = test_model_regression(model, test_loader, device)
loss_before_training = criterion(y_pred, y_test)
print("Mean Squared Error before training:", loss_before_training.item())

# Train the model \n",
trained_model, train_losses = train_model_regression(model, criterion, optimizer, num_epochs, train_loader, val_loader, device, writer)\n",

# Save the best model\n",
if not os.path.exists('models'):
    os.makedirs('models')
torch.save(trained_model.state_dict(), 'models/best_model.pth')

# Load the best model\n",
best_model_path = 'models/best_model.pth'
if os.path.exists(best_model_path):
    trained_model.load_state_dict(torch.load(best_model_path))
    trained_model.to(device)
    print("Model loaded successfully.")
else:
    print("Error: The specified model path does not exist.")

# Test after training
y_test, y_pred = test_model_regression(trained_model, test_loader, device)
loss_after_training = criterion(y_pred, y_test)
print("Mean Squared Error after training:", loss_after_training.item())

# Visualizza l'andamento delle perdite durante l'addestramento\n",
plt.plot(train_losses, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()