# Dataset definition

Definiamo il dataset usando le primitive di pytorch, una volta fatto questo prepariamo il dataloader e portiamo il dataset in un formato che risulta comodo

In [153]:
import pandas as pd
import numpy as np
import torch

class ParticleDataset(torch.utils.data.Dataset):
    def __init__(self, data_dir, detections_file, transform=None):
        self.data_dir = data_dir
        self.detections = pd.read_csv(detections_file,header=[i for i in range(1,100)]+[101], dtype=float)
        self.transform = transform

    def __len__(self):
        return len(self.detections)
    
    def __getitem__(self, idx):
        self.item = self.detections.iloc[idx]
        self.item = self.item.to_numpy(dtype = np.float32)
        return (self.item[1:-1], self.item[-1].astype(np.long))

In [2]:
#data = ParticleDataset('.', 'dataset.csv', transform=None)
#Risolvere i problemi nel caricamento del dataset

balanced_data = ParticleDataset('.', 'balanced_dataset.csv', transform=None)

In [154]:
from torch.utils.data import DataLoader
batch_size = 32

train_loader = DataLoader(balanced_data, batch_size=batch_size, shuffle=True)

In [155]:
next(iter(train_loader))

[tensor([[367.0000, 366.0000, 361.0000,  ..., 624.0000, 626.0000, 627.0000],
         [378.0000, 375.0000, 369.0000,  ..., 641.0000, 646.0000, 641.0000],
         [367.0000, 361.0000, 357.0000,  ..., 626.0000, 626.0000, 624.0000],
         ...,
         [385.1729, 382.9014, 389.5186,  ..., 630.6914, 635.3829, 632.4200],
         [374.0000, 370.0000, 374.0000,  ..., 629.0000, 622.0000, 623.0000],
         [366.0000, 368.0000, 371.0000,  ..., 641.0000, 642.0000, 636.0000]]),
 tensor([0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1,
         0, 1, 1, 1, 1, 1, 0, 0])]

# Definizione del modello

Creiamo un modello primitivo e molto semplice per poter stabilire una baseline e controllare che il training scorra correttamente

In [156]:
from torch import nn

class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 2),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits.float()

In [244]:
model = NeuralNetwork(100)
print(len(train_loader))
print(balanced_data.__len__())

8031
256967


In [248]:
#Training loop

def basic_training_loop(model, train_loader, lr = 1e-5, epochs = 10, momentum = 0.9):
    
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    for e in range(epochs):
        epoch_acc = 0
        epoch_loss = 0
        batch_step = 0

        for batch in train_loader:
            x = batch[0]
            y = batch[1]
            #x = x.reshape(x.shape[0], -1)
            x = torch.unsqueeze(x, 1)

            output = model(x)

            loss = criterion(output, torch.nn.functional.one_hot(y, num_classes=2).float())

            batch_step += 1
            epoch_loss += loss
            epoch_acc += (torch.argmax(torch.nn.functional.softmax(output), dim=1) == y).sum()
            print("iter-acc: ", (torch.argmax(torch.nn.functional.softmax(output), dim=1) == y).sum()/x.shape[0])
            

            #print(torch.nn.functional.one_hot(y, num_classes=2).float())
            #print(torch.nn.functional.softmax(output))

            if loss is np.nan:
                print("x: ", x)
                print("y: ", y)
                print("model: ", output)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


        print("epoch_loss: ", epoch_loss/len(train_loader))
        print("epoch_acc: ", epoch_acc/balanced_data.__len__())

    return model

In [249]:
class OneDimCnn(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=5)
        self.pool1 = nn.MaxPool1d(kernel_size=2)
        self.conv2 = nn.Conv1d(in_channels = 16, out_channels=32, kernel_size=5, padding=2)
        self.pool2 = nn.MaxPool1d(kernel_size=2)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
        self.pool3 = nn.MaxPool1d(kernel_size=2)
        self.conv4 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=5, padding=2, stride=2)
        self.pool4 = nn.MaxPool1d(kernel_size=6)
        self.lin1 = nn.Linear(128, 32)
        self.ac1 = nn.ReLU()
        self.lin2 = nn.Linear(32, 2)
        
        

    def forward(self, input):
        x = self.conv1(input)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.pool3(x)
        x = self.conv4(x)
        x = self.pool4(x)
        x = torch.reshape(x, (input.shape[0], -1, 128))
        x = self.lin1(x)
        x = self.ac1(x)
        x = self.lin2(x)
        x = torch.squeeze(x)

        return x
    

In [250]:
model = OneDimCnn()

trained_model = basic_training_loop(model=model, train_loader=train_loader)

  epoch_acc += (torch.argmax(torch.nn.functional.softmax(output), dim=1) == y).sum()
  print("iter-acc: ", (torch.argmax(torch.nn.functional.softmax(output), dim=1) == y).sum()/x.shape[0])


iter-acc:  tensor(0.5312)
iter-acc:  tensor(0.4062)
iter-acc:  tensor(0.5000)
iter-acc:  tensor(0.5312)
iter-acc:  tensor(0.5000)
iter-acc:  tensor(0.5000)
iter-acc:  tensor(0.4062)
iter-acc:  tensor(0.5938)
iter-acc:  tensor(0.4375)
iter-acc:  tensor(0.4688)
iter-acc:  tensor(0.5000)
iter-acc:  tensor(0.6250)
iter-acc:  tensor(0.5000)
iter-acc:  tensor(0.5312)
iter-acc:  tensor(0.5312)
iter-acc:  tensor(0.5625)
iter-acc:  tensor(0.6562)
iter-acc:  tensor(0.4688)
iter-acc:  tensor(0.5312)
iter-acc:  tensor(0.5000)
iter-acc:  tensor(0.4375)
iter-acc:  tensor(0.5000)
iter-acc:  tensor(0.6250)
iter-acc:  tensor(0.5000)
iter-acc:  tensor(0.4375)
iter-acc:  tensor(0.4688)
iter-acc:  tensor(0.6250)
iter-acc:  tensor(0.5312)
iter-acc:  tensor(0.3750)
iter-acc:  tensor(0.6250)
iter-acc:  tensor(0.3438)
iter-acc:  tensor(0.6875)
iter-acc:  tensor(0.5000)
iter-acc:  tensor(0.7188)
iter-acc:  tensor(0.3750)
iter-acc:  tensor(0.5000)
iter-acc:  tensor(0.5938)
iter-acc:  tensor(0.4375)
iter-acc:  t

KeyboardInterrupt: 