In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report
from collections import Counter

from nn_helpers import ParticleDS, train_loop, test_loop

In [None]:
# A neural net is a sequence of layers with different numbers of parameters
# This one has 4 layers
# - an input linear layer with 6 inputs (the number of predictor variables) and 16 outputs (this is arbitrary)
# - an Rectified Linear Unit layer which adjusts the results of the first layer to be 0 below 0
# - a second linear layer with 16 inputs and 4 outputs (the number of particle types)
# - a final layer that gets the most likely output of the 4 previous output values to give a final prediction
class TinyModel(nn.Module):
    def __init__(self):
        super(TinyModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(in_features=6, out_features=8),
            nn.ReLU(),
            nn.Linear(in_features=8, out_features=4),
            nn.Softmax()
        )

    def forward(self, x):
        return self.model(x)

nn_model = TinyModel()

In [None]:
# Load the data in using our custom dataset classes (which is defined in the helpers file)
batch_size = 32
predictors = ['p', 'theta', 'beta', 'nphe', 'ein', 'eout']
#predictors = ['p_scaled', 'theta_scaled', 'beta_scaled', 'nphe_scaled', 'ein_scaled', 'eout_scaled']
outcome = 'id'

train_ds = ParticleDS('../data/pid_train_balanced.csv', predictors, outcome)
test_ds = ParticleDS('../data/pid_test.csv', predictors, outcome)

print(Counter(train_ds.y))
print(Counter(test_ds.y))

train_dataloader = DataLoader(train_ds, batch_size=batch_size)
test_dataloader = DataLoader(test_ds, batch_size=batch_size)

In [None]:
# These are just typical values that tend to work as a first pass
# Experimenting with these to get the best model is called hyperparameter tuning
learning_rate = .1

epochs = 100

# Cross Entropy Loss is a popular way to measure the difference between predicted categories and actual categories (aka loss)
loss_fn = nn.CrossEntropyLoss()
# Stochastic Gradient Descent is the method that updates the model parameters (aka how it learns)
optimiser = torch.optim.SGD(nn_model.parameters(), lr=learning_rate)

# The train loop and test loop are defined in our nn_helpers file - have a look at those to see what's happening in each loop
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, nn_model, loss_fn, optimiser, batch_size)
    test_loop(test_dataloader, nn_model, loss_fn)
print("Done!")

In [None]:
# Get the final performance metrics on the test set
nn_model.eval()
pred_y = torch.argmax(nn_model(test_ds.X), dim=1).detach().numpy()
print(classification_report(test_ds.y, pred_y))