# Dataset definition

Definiamo il dataset usando le primitive di pytorch, una volta fatto questo prepariamo il dataloader e portiamo il dataset in un formato che risulta comodo

In [220]:
import pandas as pd
import numpy as np
import torch

class ParticleDataset(torch.utils.data.Dataset):
    def __init__(self, data_dir, detections_file, transform=None):
        self.data_dir = data_dir
        self.detections = pd.read_csv(detections_file,header=[i for i in range(1,100)]+[101], dtype=float)
        self.transform = transform

    def __len__(self):
        return len(self.detections)
    
    def __getitem__(self, idx):
        self.item = self.detections.iloc[idx]
        self.item = self.item.to_numpy(dtype = np.float32)
        return (self.item[1:-1], self.item[-1].astype(np.long))

In [221]:
#data = ParticleDataset('.', 'dataset.csv', transform=None)
#Risolvere i problemi nel caricamento del dataset

balanced_data = ParticleDataset('.', 'balanced_dataset.csv', transform=None)

In [222]:
from torch.utils.data import DataLoader
batch_size = 8

train_loader = DataLoader(balanced_data, batch_size=batch_size, shuffle=True)

In [223]:
next(iter(train_loader))

[tensor([[353.7797, 359.4576, 356.2203, 360.1186, 356.1186, 357.8983, 358.3390,
          360.0169, 358.2372, 360.5593, 361.2203, 356.2035, 360.4576, 355.5593,
          354.1186, 356.6779, 353.5593, 352.3390, 353.8814, 353.4407, 360.1186,
          364.5762, 362.8983, 364.1186, 358.7797, 358.4407, 366.3390, 362.7797,
          358.3390, 366.8983, 406.4576, 470.0169, 526.6779, 568.6779, 588.9832,
          597.5424, 606.0000, 608.3389, 610.0000, 617.2203, 613.2203, 607.4407,
          608.2203, 607.0000, 609.6611, 613.6611, 614.2203, 613.8814, 616.8814,
          618.4407, 614.4407, 614.4407, 613.2203, 611.4407, 612.8983, 612.8983,
          612.2203, 609.4407, 613.2203, 615.4407, 612.8814, 613.2203, 612.2203,
          613.3221, 613.6611, 611.3389, 609.1017, 608.6442, 607.9832, 611.3221,
          610.8814, 607.4407, 611.4407, 615.5593, 617.8983, 616.3389, 616.5762,
          616.9152, 609.7965, 607.2372, 606.5593, 609.4407, 615.4407, 616.6611,
          614.6611, 614.3221, 611.4407, 

# Definizione del modello

Creiamo un modello primitivo e molto semplice per poter stabilire una baseline e controllare che il training scorra correttamente

In [224]:
from torch import nn

class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 2),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits.float()

In [241]:
model = NeuralNetwork(100)

In [244]:
#Training loop

def basic_training_loop(model, train_loader, lr = 1e-2, epochs = 10, momentum = 0.9):
    
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    for e in range(epochs):
        for batch in train_loader:
            x = batch[0]
            y = batch[1]

            output = model(x)

            loss = criterion(output, torch.nn.functional.one_hot(y, num_classes=2).float())

            if loss is np.nan:
                print("x: ", x)
                print("y: ", y)
                print("model: ", output)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            print("loss: ", loss.item())

    return model

In [None]:
trained_model = basic_training_loop(model=model, train_loader=train_loader)