In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
def coord_convert(x,y,z):
    
    phi = np.arctan2(y, x)
    theta = np.arctan2(np.sqrt(x ** 2 + y ** 2), z)
    eta = -np.log(np.tan(theta/2))    
    return phi, eta, z


def sector_splitter(df, n, m):
    
    phi_bins = np.linspace(-np.pi, np.pi, n+1)  # bins for phi angles
    eta_bins = np.linspace(-4.5, 4.5, m+1)  # bins for eta angles
        
    df_list = []
    for i in range(n):
        for j in range(m):
            
            phi_mask = (phi_bins[i] < df['phi']) & (df['phi'] < phi_bins[i+1])
            eta_mask = (eta_bins[j] < df['eta']) & (df['eta'] < eta_bins[j+1])
            
            out_df = df[(phi_mask & eta_mask)].reset_index(drop=True)
            
            df_list.append(out_df)
                    
    return df_list



def data_puller(datamax):
    
    directory = '/home/aaportel/teams/group-3/data/'
    folders = ['train_1/','train_2/','train_5/']

    data_cutoff = 0
    data = []
   
    for folder in folders:
       
        hit_files = sorted([f for f in os.listdir(directory + folder) if f.endswith('hits.csv')])   
        truth_files = sorted([f for f in os.listdir(directory + folder) if f.endswith('truth.csv')])   
        
        for hit_file, truth_file in zip(hit_files, truth_files):

            # read a CSV file into a DataFrame
            X = pd.read_csv(directory + folder + hit_file, usecols=['x','y','z'])
            Y = pd.read_csv(directory + folder + truth_file, usecols=['particle_id'])
            
            # calculate phi, eta, and z from x, y, and z
            phi, eta, z = coord_convert(X['x'], X['y'],X['z'])
            
            # create a new DataFrame with phi, theta, eta, z and particle_id columns
            df = pd.DataFrame({
                'phi': phi,
                'eta': eta,
                'z': z,
                'particle_id': Y['particle_id'] 
            })

            # splits dataframe into chunks based on detector geometry
            n, m = 8, 4
            df_list = sector_splitter(df, n, m)
            
            # appends elements of df_list into a mega list
            data.extend(df_list)
            
            data_cutoff += 1
            if data_cutoff > datamax:
                return data
            
    return data

In [3]:
from torch_geometric.loader import DataLoader
from sklearn.model_selection import train_test_split
from classes import CustomDataset

use_cuda = True

dataframes = data_puller(1)

# Split into train and test sets
train_dfs, valid_dfs = train_test_split(dataframes, test_size=0.2)

# Create custom datasets and dataloaders
train_dataset = CustomDataset(train_dfs, use_cuda)
valid_dataset = CustomDataset(valid_dfs, use_cuda)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False)

In [4]:
import torch
import torch.nn as nn
from classes import EdgeClassifier

node_indim = 3
edge_indim = 2

model = EdgeClassifier(node_indim, edge_indim)
if use_cuda:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [5]:
# Train the model
for epoch in range(100):
    total_loss = 0.0
    for data in train_loader:
        torch.cuda.empty_cache()
        optimizer.zero_grad()
        output = model(data.x.float(), data.edge_index, data.edge_attr.float())
        loss = criterion(output, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        del data
        torch.cuda.empty_cache()

    print("Epoch %d, Loss = %f" % (epoch+1, total_loss/len(train_loader)))

  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: CUDA out of memory. Tried to allocate 1997.22 GiB (GPU 0; 10.92 GiB total capacity; 899.21 MiB already allocated; 9.18 GiB free; 928.00 MiB reserved in total by PyTorch)