In [None]:
%load_ext autoreload
%autoreload 2
import torch
from torch import nn
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, root_mean_squared_error
from neural import *
from dataset import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# Preprocess data(normalize data)/feature engineering

# TODO create datasets using custom Dataset class and split into train, validation, test sets
train_dataset = ...
validation_dataset = ...
test_dataset = ...

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [None]:
# model definition

# TODO change model shape
model = MLP(input_size=10, output_size=2, hidden_size=30, num_hidden_layers=3).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

print(model)

train_losses, validation_losses = [], [] 
cur_epoch = 0

def trainModel(model: nn.Module, dataloader: torch.utils.data.DataLoader, criterion: nn.Module, optimizer: torch.optim.Optimizer, device: torch.device) -> float:
    # trains the model for one epoch
    # return average loss over epoch
    model.train()
    totalLoss = 0
    n = len(dataloader)
    for x, y in dataloader:
        x, y = x.to(device), y.to(device)
        output = model(x)
        loss = criterion(output, y)

        totalLoss += loss.item()

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    return totalLoss/n

def evaluateModel(model: nn.Module, dataloader: torch.utils.data.DataLoader, criterion: nn.Module, device: torch.device,) -> float:
    # evaluates the model
    # return average loss
    model.eval()
    totalLoss = 0
    n = len(dataloader)
    for x, y in dataloader:
        x, y = x.to(device), y.to(device)
        output = model(x)
        loss = criterion(output, y)
        
        totalLoss += loss.item()
    return totalLoss/n



In [None]:
# TODO add ability to save model

num_epochs = 5
for _ in range(num_epochs):
    train_loss = trainModel(model, train_loader, criterion, optimizer, device)
    validation_loss = evaluateModel(model, validation_loader, criterion, device)

    train_losses.append(train_loss)
    validation_losses.append(validation_loss)

    cur_epoch += 1

    print(f"Epoch {cur_epoch+1}| Train Loss: {train_loss:.4f}| Validation Loss: {validation_loss:.4f}")

In [None]:
# plot training and validation loss
plt.figure(figsize=(6, 4))
plt.title(f"Training and Validation Loss")
plt.plot(train_losses, label=f"Train")
plt.plot(validation_losses, label=f"Validationn")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.legend()
plt.show()


def print_metrics(model, dataloader, device):
    model.eval()
    allPreds, trueOutputs = [], []
    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y.to(device)
            preds = model(x)
            allPreds.extend(preds.cpu().numpy())
            trueOutputs.extend(y.cpu().numpy())

        print(mean_squared_error(trueOutputs, allPreds))
        print(root_mean_squared_error(trueOutputs, allPreds))
        print(mean_absolute_error(trueOutputs, allPreds))
        print(r2_score(trueOutputs, allPreds))


print(f"\nMetrics - Testing\n")
print_metrics(model, test_loader, device)
print(f"\nMetrics - Validation\n")
print_metrics(model, validation_loader, device)



In [None]:
# test the model - when training complete
test_loss = evaluateModel(model, test_loader, criterion, device)
print(f"Test Loss: {test_loss:.4f}")
print(f"Final Train Loss: {train_losses[-1]:.4f}")
print(f"Final Validation Loss: {validation_losses[-1]:.4f}")