# Neural Network Training

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

from torch.utils.data import Dataset, DataLoader

In [2]:
# Dataset
dataset_name = "label_top"

# Hyperparameters
num_epochs = 3     # this parameter defines how many time the network sees each item of the dataset
lr = 1e-3          # this parameter defines how much the parameters of the network are updated after each step
batch_size=1024    # this parameter defines how many samples are run through the network in each step

In [3]:
class NetworkDataset(Dataset):
    """
    This class loads data from a pandas dataframe
    """
    def __init__(self, filename_features: str, filename_labels: str):
        """
        Initialize the dataset class
        :param filename: The filename of the CSV file
        """
        self.df_features = pd.read_pickle(filename_features, compression='gzip').astype(float).to_numpy()
        df_labels = pd.read_pickle(filename_labels, compression='gzip').astype(int)
        self.n_features = self.df_features.shape[1]
        self.n_classes = df_labels.shape[1]
        self.df_labels = (df_labels * np.arange(df_labels.shape[1])).sum(axis=1).to_numpy()

    def __len__(self):
        """
        This function returns the total number of items in the dataset.
        We are using a pandas data frame in this dataset which has an attribut named shape.
        The first dimension of shape is equal to the number of items in the dataset.
        :return: The number of rows in the CSV file
        """
        return self.df_features.shape[0]

    def __getitem__(self, idx):
        """
        This function returns a single tuple from the dataset.
        :param idx: The index of the tuple that should be returned.
        :return: Tuple of an x-value and a y-value
        """
        return self.df_features[idx], self.df_labels[idx]

In [4]:
class ClassificationNetwork(nn.Module):
    def __init__(self, n_features: int, n_classes: int, hidden_dim=128, f_activation=nn.ReLU):
        """
        Here we define the layers of our neural network.
        """
        super(ClassificationNetwork, self).__init__()
        # Our data has four features, so the first linear layer has to have four input dimensions.
        self.layer1 = nn.Linear(n_features, hidden_dim)
        # The first hidden layer need to have the same input dimension as layer1 has outputs. 
        self.layer2 = nn.Linear(hidden_dim, hidden_dim)
        # We have three different classes in out data, so the last linear layer must have 3 output dimensions.
        self.layer3 = nn.Linear(hidden_dim, n_classes)
        self.activation = f_activation()
        # The outputs of the last linear layer need to be mapped to a probability function.
        # This can be done by running the vectors through a softmax function.
        self.classification = nn.Softmax(dim=1)
        
    def forward(self, x):
        """
        The forward function takes a data vector and runs it through the layers of our neural network.
        :return: The forward function returns a vector of size 3 which contains the
            probabilities for all three classes for a given data vector.
        """
        # Run the input through the first linear layer and then through the activation function.
        x = self.activation(self.layer1(x))
        # Run the outputs of layer 1 through layer 2.
        x = self.activation(self.layer2(x))
        # Run the outputs of layer 2 through the third linear layer and then through the softmax classification function.
        x = self.classification(self.layer3(x))
        return x

In [5]:
dataset = NetworkDataset("features_final.pkl.gz", f"{dataset_name}_ohe.pkl.gz")
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [6]:
# Now that we have defined the network class we need to create an instance of it
net = ClassificationNetwork(n_features=dataset.n_features, n_classes=dataset.n_classes)

In [7]:
def get_accuracy(net, dataloader):
    """
    This function computes the accuracy of the neural network by sampling data from a
    data loader, running it through the network and computing the percentage of correct predictions.
    :param net: The neural network
    :param dataloader: A DataLoader instance
    """
    # torch.no_grad means that no gradients should be computed when running data through the network.
    # When we run test data through the network this should not have an effect on our training, that is
    # why we don't want to compute gradients here.
    with torch.no_grad():
        X_test, y_test = next(iter(dataloader))
        y_pred = net(X_test.to(torch.float32))
        correct = (torch.argmax(y_pred, dim=1) == y_test).type(torch.float32)
        return correct.mean().item()

In [8]:
# Let's check the accuracy before training the network
print("Accuracy before training:", get_accuracy(net, dataloader))

Accuracy before training: 0.0341796875


In [9]:
# This is our loss function. Which one do we need for classification: MSELoss or CrossEntropyLoss?
criterion = nn.CrossEntropyLoss()
# This is the algorithm used for optimizing our neural network parameters.
optimizer = optim.AdamW(net.parameters(), lr=lr)

for epoch in range(num_epochs):
    for X, Y in dataloader:        
        # Forward pass
        outputs = net(X.to(torch.float32))
        
        # Compute the difference between the true labels and the predicted labels
        loss = criterion(outputs, Y.to(torch.long))
    
        # First reset the gradients
        optimizer.zero_grad()
        
        # Then compute the new gradients
        loss.backward()
        
        # And finally perform the backpropagation step
        optimizer.step()
    
    # Print some metrics about the learning progress
    if (epoch + 1) % 1 == 0:
        accuracy = get_accuracy(net, dataloader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy on test data:", accuracy)

Epoch [1/3], Loss: 1.4602, Accuracy on test data: 0.6845703125
Epoch [2/3], Loss: 1.4489, Accuracy on test data: 0.6953125
Epoch [3/3], Loss: 1.4867, Accuracy on test data: 0.6982421875
