In [None]:
import random
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, Subset
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
import matplotlib.pyplot as plt

# For reproducibility
def fix_random(seed: int) -> None:
    """Fix all the possible sources of randomness."""
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

# Define the Data Layer
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y).view(-1, 1)  # Reshape to (batch_size, 1) for regression
         
        self.num_features = X.shape[1]
        self.num_classes = 1  # Regression has only one output

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx, :], self.y[idx]

# Define a simple neural network for regression
class FeedForwardRegression(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(FeedForwardRegression, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.fc1 = nn.Linear(self.input_size, self.hidden_size)
        self.fc2 = nn.Linear(self.hidden_size, 1)  # Output layer has 1 neuron for regression
        self.relu = nn.ReLU()

    def forward(self, x):
        h = self.fc1(x)
        h = self.relu(h)
        output = self.fc2(h)
        return output

# Define a function for the training process for regression
def train_model_regression(model, criterion, optimizer, epoch, train_loader, val_loader, device, writer, log_name="model"):
    n_iter = 0
    best_valid_loss = float('inf')
    train_losses = []  # Lista per registrare le perdite durante l'addestramento

    for epoch in range(epoch):
        model.train()
        epoch_loss = 0.0

        for data, targets in train_loader:
            data, targets = data.to(device), targets.to(device)
            
            optimizer.zero_grad()
            y_pred = model(data)
            loss = criterion(y_pred, targets)
            writer.add_scalar("Loss/train", loss, n_iter)
            loss.backward()
            optimizer.step()
            n_iter += 1

            epoch_loss += loss.item()

        average_loss = epoch_loss / len(train_loader)
        train_losses.append(average_loss)

        labels, y_pred = test_model_regression(model, val_loader, device)
        loss_val = criterion(y_pred, labels)
        writer.add_scalar("Loss/val", loss_val, epoch)
        
        if loss_val.item() < best_valid_loss:
            best_valid_loss = loss_val.item()
            if not os.path.exists('models'):
                os.makedirs('models')
            torch.save(model.state_dict(), 'models/'+log_name)

        print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {average_loss}, Validation Loss: {loss_val.item()}")

    return model, train_losses

# Define a function to evaluate the performance on validation and test sets for regression
def test_model_regression(model, data_loader, device):
    model.eval()
    y_pred = []
    y_test = []
    
    for data, targets in data_loader:
        data, targets = data.to(device), targets.to(device)
        y_pred.append(model(data))
        y_test.append(targets)
    
    y_test = torch.cat(y_test)
    y_pred = torch.cat(y_pred)
    
    return y_test, y_pred

# Set device
device = torch.device('cpu')  # or 'cuda' if available
print("Device: {}".format(device))

# Train hyperparameters
num_epochs = 100
learning_rate = 0.01
batch = 16

# Load data
FILENAME = "train.csv"
df = pd.read_csv(FILENAME)

seed = 42

# Select input variables (X) and output variable (y)
X = df.drop("Year", axis=1).values  # Convert to numpy array
y = df["Year"].values

# Split data
indices = np.arange(X.shape[0])
train_idx, test_idx = train_test_split(indices, test_size=0.2, random_state=seed)
train_idx, val_idx = train_test_split(train_idx, test_size=0.2, random_state=seed)

# Min-Max scaling
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Create dataset
my_dataset = MyDataset(X, y)

# Create subsets and relative dataloaders
train_subset = Subset(my_dataset, train_idx)
train_loader = DataLoader(train_subset, batch_size=batch, shuffle=True)

val_subset = Subset(my_dataset, val_idx)
val_loader = DataLoader(val_subset, batch_size=1)

test_subset = Subset(my_dataset, test_idx)
test_loader = DataLoader(test_subset, batch_size=1)

# Set seed for reproducibility
fix_random(seed)

# Start TensorBoard
writer = SummaryWriter()

# Define the architecture, loss, and optimizer
hidden_size = 32
model = FeedForwardRegression(my_dataset.num_features, hidden_size)
model.to(device)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Test before training
y_test, y_pred = test_model_regression(model, test_loader, device)
loss_before_training = criterion(y_pred, y_test)
print("Mean Squared Error before training:", loss_before_training.item())

# Train the model 
trained_model, train_losses = train_model_regression(model, criterion, optimizer, num_epochs, train_loader, val_loader, device, writer)

# Save the best model
if not os.path.exists('models'):
    os.makedirs('models')
torch.save(trained_model.state_dict(), 'models/best_model.pth')

# Load the best model
best_model_path = 'models/best_model.pth'
if os.path.exists(best_model_path):
    trained_model.load_state_dict(torch.load(best_model_path))
    trained_model.to(device)
    print("Model loaded successfully.")
else:
    print("Error: The specified model path does not exist.")

# Test after training
y_test, y_pred = test_model_regression(trained_model, test_loader, device)
loss_after_training = criterion(y_pred, y_test)
print("Mean Squared Error after training:", loss_after_training.item())

# Visualizza l'andamento delle perdite durante l'addestramento
plt.plot(train_losses, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Close TensorBoard writer after training
writer.flush()
writer.close()
