In [14]:
import os
import numpy as np
import torch
from torch_geometric.data import Data, Dataset
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv
import torch.nn as nn
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
import pandas as pd
from sklearn.model_selection import train_test_split

In [15]:
def coord_convert(x,y,z):
    
    phi = np.arctan2(y, x)
    theta = np.arctan2(np.sqrt(x ** 2 + y ** 2), z)
    eta = -np.log(np.tan(theta/2))    
    return phi, eta, z


def sector_splitter(df, n, m):
    
    phi_bins = np.linspace(-np.pi, np.pi, n+1)  # bins for phi angles
    eta_bins = np.linspace(-4.5, 4.5, m+1)  # bins for eta angles
        
    df_list = []
    for i in range(n):
        for j in range(m):
            
            phi_mask = (phi_bins[i] < df['phi']) & (df['phi'] < phi_bins[i+1])
            eta_mask = (eta_bins[j] < df['eta']) & (df['eta'] < eta_bins[j+1])
            
            df_list.append(df[(phi_mask & eta_mask)])
                    
    return df_list


def data_puller(datamax):
    
    directory = '/home/aaportel/teams/group-3/data/'
    folders = ['train_1/','train_2/','train_5/']

    data_cutoff = 0
    data = []
   
    for folder in folders:
       
        hit_files = sorted([f for f in os.listdir(directory + folder) if f.endswith('hits.csv')])   
        truth_files = sorted([f for f in os.listdir(directory + folder) if f.endswith('truth.csv')])   
        
        for hit_file, truth_file in zip(hit_files, truth_files):

            # read a CSV file into a DataFrame
            X = pd.read_csv(directory + folder + hit_file, usecols=['x','y','z'])
            Y = pd.read_csv(directory + folder + truth_file, usecols=['particle_id'])
            
            # calculate phi, eta, and z from x, y, and z
            phi, eta, z = coord_convert(X['x'], X['y'],X['z'])

            # create a new DataFrame with phi, theta, eta, z and particle_id columns
            df = pd.DataFrame({
                'phi': phi,
                'eta': eta,
                'z': z,
                'particle_id': Y['particle_id'] 
            })

            # splits dataframe into chunks based on detector geometry
            n, m = 8, 4
            df_list = sector_splitter(df, n, m)
            
            # appends elements of df_list into a mega list
            data.extend(df_list)
            
            data_cutoff += 1
            if data_cutoff > datamax:
                return data
            
    return data

In [19]:
class CustomDataset(Dataset):
    def __init__(self, dataframes):
        self.dataframes = dataframes
        
    def __len__(self):
        return len(self.dataframes)
    
    def __getitem__(self, idx):

        df = self.dataframes[idx]
        
        input_data = df[['phi','eta','z']]
        output_data = df['particle_id']

        # Create a tensor of node features by stacking the columns of the input data
        node_features = torch.tensor(input_data.values)
        output_features = torch.tensor(output_data.values)

        phi = torch.from_numpy(input_data[['phi']].values)
        eta = torch.from_numpy(input_data[['eta']].values)

#         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#         phi = phi.to(device)
#         eta = eta.to(device)

        # Compute the pairwise differences between the phi and eta columns of adjacent nodes
        phi_diff = phi.unsqueeze(0) - phi.unsqueeze(-1)
        eta_diff = eta.unsqueeze(0) - eta.unsqueeze(-1)

        #this is deltaR
        diff_norm = torch.sqrt(phi_diff**2 + eta_diff**2)

        # Create a binary adjacency matrix based on a threshold of 1.7
        adjacency_matrix = torch.where(
            (diff_norm < 1.7) & (phi_diff > 0) & (eta_diff > 0), 
            torch.ones_like(diff_norm), 
            torch.zeros_like(diff_norm)
        )

        # Convert the adjacency matrix to a list of edge indices
        edge_indices = adjacency_matrix.squeeze(-1).nonzero(as_tuple=False).t()


        # Create a tensor of edge features by concatenating the phi and eta differences    
        phi_diff = phi_diff[edge_indices[0], edge_indices[1]]
        eta_diff = eta_diff[edge_indices[0], edge_indices[1]]
        edge_features = torch.cat((
            phi_diff.unsqueeze(-1),
            eta_diff.unsqueeze(-1)
        ), -1)

        # Create a PyTorch Geometric Data object
        data = Data(
            x          = node_features.float(), 
            edge_index = edge_indices.long(), 
            edge_attr  = edge_features,
            y          = output_features
        )
        return data

In [21]:
class myGNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(myGNN, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers

        # Define the message passing layer
        self.conv = nn.ModuleList()
        for i in range(num_layers):
            self.conv.append(MessagePassing(
                aggr='mean', 
                flow='source_to_target', 
                node_dim=0,
                mlp=nn.Sequential(
                    nn.Linear(input_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, input_dim * hidden_dim)
                ))
            )
        # Define the fully connected layers
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        # Add self-loops to the adjacency matrix
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
        # Compute the degree of each node
        row, col = edge_index
        deg = degree(col, x.size(0), dtype=x.dtype)
        # Perform message passing
        for i in range(self.num_layers):
            x = self.conv[i].propagate(edge_index, x=x)
            x = x.view(-1, self.hidden_dim) ##---------------ERROR RIGHT HERE WHYY
            x = x.view(x.size(0), self.input_dim, self.hidden_dim)
        # Compute the final node representations
        x = torch.sum(x, dim=1) / deg.view(-1, 1)
        # Perform fully connected layers
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    



In [22]:
import torch.nn.functional as F

class GNN(torch.nn.Module):
    def __init__(self, num_features, hidden_size, num_classes, num_layers):
        super(GNN, self).__init__()

        self.num_layers = num_layers

        self.conv1 = MessagePassing(aggr='add')
        self.lin1 = torch.nn.Linear(num_features, hidden_size)

        for i in range(num_layers):
            setattr(self, f'conv{i+2}', MessagePassing(aggr='add'))
            setattr(self, f'lin{i+2}', torch.nn.Linear(hidden_size, hidden_size))

        self.lin3 = torch.nn.Linear(hidden_size, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index        
        x = self.lin1(x)
        x = F.relu(self.conv1.propagate(edge_index, x=x))
        x = self.lin2(x)
        return x
    
    def message(self, x_j):
        return x_j

    def update(self, aggr_out):
        return aggr_out

In [None]:
dataframes = data_puller(1)

# Split into train and test sets
train_dfs, test_dfs = train_test_split(dataframes, test_size=0.2)

# Create custom datasets and dataloaders
train_dataset = CustomDataset(train_dfs)
test_dataset = CustomDataset(test_dfs)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Define the parameters of the model
# number of features per node
input_dim = 3
#size of hidden layer
hidden_dim = 64
#size of output
output_dim = 1
# number hidden layers
num_layers = 1

# Create the PFN model
model = GNN(input_dim, hidden_dim, output_dim, num_layers)
# model = model.to(device)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Train the model
for epoch in range(100):
    total_loss = 0.0
    for data in train_loader:
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print("Epoch %d, Loss = %f" % (epoch+1, total_loss/len(dataloader)))
