In [1]:
#Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch import Tensor
import os

os.environ['TORCH'] = torch.__version__
print(torch.__version__)

2.3.1+cu121


In [2]:
NTRACKS = 500

#Get the current working directory
cwd = os.getcwd()
file_path = cwd + f'/points_{NTRACKS}.csv'

In [3]:
df=pd.read_csv(file_path)

#Round the values of the dataset to 4 decimal places
df = df.round(4)

#Add a column to use as index from 0 to the length of the dataset
df['n_label'] = range(0, len(df))

#delete the column p_label
#df = df.drop('p_label', axis=1)

df.head()

Unnamed: 0,x,y,z,N_side,N_layer,t_label,phi,eta,q,pt,d0,z0,n_label
0,9.4095,2.359,52.5321,0,1,T0,0.2014,2.3936,-1,34.4328,0.4129,-0.0973,0
1,18.8156,4.3276,104.7193,0,2,T0,0.2014,2.3936,-1,34.4328,0.4129,-0.0973,1
2,28.2948,6.3439,157.3488,0,3,T0,0.2014,2.3936,-1,34.4328,0.4129,-0.0973,2
3,37.7673,8.3911,209.9782,0,4,T0,0.2014,2.3936,-1,34.4328,0.4129,-0.0973,3
4,47.1536,10.4518,262.1654,0,5,T0,0.2014,2.3936,-1,34.4328,0.4129,-0.0973,4


In [4]:
import torch_geometric.transforms as T
from torch_geometric.data import HeteroData

In [5]:
#Empty hetero graph 
data=HeteroData()

## Add nodes

In [6]:
#node names
nodes_s=df['n_label'].values
nodes_t=df['n_label'].values

In [7]:
#Add nodes to the graph
data['source'].node_id = torch.tensor(nodes_s, dtype=torch.long)
data['target'].node_id = torch.tensor(nodes_t, dtype=torch.long)

In [8]:
#Add node attributes, in this case the position of the points
data['source'].x = Tensor(df[['x', 'y', 'z']].values)
data['target'].x = Tensor(df[['x', 'y', 'z']].values)

In [9]:
data

HeteroData(
  source={
    node_id=[4929],
    x=[4929, 3],
  },
  target={
    node_id=[4929],
    x=[4929, 3],
  }
)

## Add edges

In [10]:
edge_path = cwd + f'/graph_{NTRACKS}.csv'

# Importing the dataset
df_edge = pd.read_csv(edge_path)
df_edge = df_edge.replace({'weight':0.5}, 0.)
df_edge

Unnamed: 0,Source,Target,weight
0,0,1,1
1,0,567,0
2,0,884,0
3,0,1134,0
4,0,1194,0
...,...,...,...
20512,4897,4898,1
20513,4907,2540,0
20514,4907,4908,1
20515,4917,4918,1


In [11]:
edge_index = torch.tensor([df_edge['Source'], df_edge['Target']], dtype=torch.long)

In [12]:
data['source', 'weight', 'target'].edge_index = edge_index

In [13]:
data

HeteroData(
  source={
    node_id=[4929],
    x=[4929, 3],
  },
  target={
    node_id=[4929],
    x=[4929, 3],
  },
  (source, weight, target)={ edge_index=[2, 20517] }
)

In [14]:
#edge attributes
weight_val = torch.from_numpy(df_edge['weight'].values).to(torch.float)

In [15]:
data['source', 'weight', 'target'].edge_label=weight_val

In [16]:
data

HeteroData(
  source={
    node_id=[4929],
    x=[4929, 3],
  },
  target={
    node_id=[4929],
    x=[4929, 3],
  },
  (source, weight, target)={
    edge_index=[2, 20517],
    edge_label=[20517],
  }
)

In [17]:
#check if the data is valid
data.validate(raise_on_error=True)

True

In [18]:
import torch_geometric.transforms as T
data = T.ToUndirected()(data)

In [19]:
del data['target', 'rev_weight', 'source'].edge_label

In [20]:
print(data.validate(raise_on_error=True))
data

True


HeteroData(
  source={
    node_id=[4929],
    x=[4929, 3],
  },
  target={
    node_id=[4929],
    x=[4929, 3],
  },
  (source, weight, target)={
    edge_index=[2, 20517],
    edge_label=[20517],
  },
  (target, rev_weight, source)={ edge_index=[2, 20517] }
)

In [21]:
train_data, val_data, test_data = T.RandomLinkSplit(
    num_val=0.00,
    num_test=0.00,
    neg_sampling_ratio=0.0,
    edge_types=[('source', 'weight', 'target')],
    rev_edge_types=[('target', 'rev_weight', 'source')],
)(data)
torch.save(test_data,f'test_data_{NTRACKS}.pt')

In [22]:
test_data

HeteroData(
  source={
    node_id=[4929],
    x=[4929, 3],
  },
  target={
    node_id=[4929],
    x=[4929, 3],
  },
  (source, weight, target)={
    edge_index=[2, 20517],
    edge_label=[0],
    edge_label_index=[2, 0],
  },
  (target, rev_weight, source)={ edge_index=[2, 20517] }
)

## Model

In [23]:
from torch_geometric.nn import SAGEConv, to_hetero

class GNNEncoder(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x


class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin1 = torch.nn.Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = torch.nn.Linear(hidden_channels, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict['source'][row], z_dict['target'][col]], dim=-1)

        z = self.lin1(z).relu()
        z = self.lin2(z)
        return z.view(-1)


class Model(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.encoder = GNNEncoder(hidden_channels, hidden_channels)
        self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')
        self.decoder = EdgeDecoder(hidden_channels)

    def forward(self, x_dict, edge_index_dict, edge_label_index):
        z_dict = self.encoder(x_dict, edge_index_dict)
        return self.decoder(z_dict, edge_label_index)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = Model(hidden_channels=32).to(device)

print(model)

Model(
  (encoder): GraphModule(
    (conv1): ModuleDict(
      (source__weight__target): SAGEConv((-1, -1), 32, aggr=mean)
      (target__rev_weight__source): SAGEConv((-1, -1), 32, aggr=mean)
    )
    (conv2): ModuleDict(
      (source__weight__target): SAGEConv((-1, -1), 32, aggr=mean)
      (target__rev_weight__source): SAGEConv((-1, -1), 32, aggr=mean)
    )
  )
  (decoder): EdgeDecoder(
    (lin1): Linear(in_features=64, out_features=32, bias=True)
    (lin2): Linear(in_features=32, out_features=1, bias=True)
  )
)


In [24]:
import torch.nn.functional as F

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

def train():
    model.train()
    optimizer.zero_grad()
    pred = model(train_data.x_dict, train_data.edge_index_dict,
                 train_data['source', 'target'].edge_label_index)
    target = train_data['source', 'target'].edge_label
    loss = F.mse_loss(pred, target)
    loss.backward()
    optimizer.step()
    return float(loss)

@torch.no_grad()
def test(data):
    data = data.to(device)
    model.eval()
    pred = model(data.x_dict, data.edge_index_dict,
                 data['source', 'target'].edge_label_index)
    pred = pred.clamp(min=0, max=1)
    target = data['source', 'target'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    return float(rmse)

In [25]:
for epoch in range(1, 3001):
    train_data = train_data.to(device)
    loss = train()
    train_rmse = test(train_data)
    val_rmse = test(val_data)
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_rmse:.4f}, '
          f'Val: {val_rmse:.4f}')

Epoch: 001, Loss: 30.4339, Train: 0.6280, Val: nan
Epoch: 002, Loss: 513.6937, Train: 0.5074, Val: nan
Epoch: 003, Loss: 64.1454, Train: 0.6239, Val: nan
Epoch: 004, Loss: 8.1395, Train: 0.6724, Val: nan
Epoch: 005, Loss: 25.5700, Train: 0.6735, Val: nan
Epoch: 006, Loss: 20.9721, Train: 0.6620, Val: nan
Epoch: 007, Loss: 9.9077, Train: 0.6157, Val: nan
Epoch: 008, Loss: 3.0310, Train: 0.5147, Val: nan
Epoch: 009, Loss: 1.5202, Train: 0.4845, Val: nan
Epoch: 010, Loss: 3.1200, Train: 0.4897, Val: nan
Epoch: 011, Loss: 2.0525, Train: 0.5728, Val: nan
Epoch: 012, Loss: 0.7757, Train: 0.6882, Val: nan
Epoch: 013, Loss: 1.9013, Train: 0.6951, Val: nan
Epoch: 014, Loss: 2.2184, Train: 0.6077, Val: nan
Epoch: 015, Loss: 0.9086, Train: 0.4796, Val: nan
Epoch: 016, Loss: 0.3848, Train: 0.4541, Val: nan
Epoch: 017, Loss: 1.2187, Train: 0.4544, Val: nan
Epoch: 018, Loss: 1.0676, Train: 0.4541, Val: nan
Epoch: 019, Loss: 0.4036, Train: 0.4741, Val: nan
Epoch: 020, Loss: 0.2866, Train: 0.5269, Val

In [26]:
with torch.no_grad():
    test_data = test_data.to(device)
    pred = model(test_data.x_dict, test_data.edge_index_dict,
                 test_data['source', 'target'].edge_label_index)
    print(pred.shape)
    pred = pred.clamp(min=0, max=1)
    target = test_data['source', 'target'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    print(f'Test RMSE: {rmse:.4f}')

sour = test_data['source', 'target'].edge_label_index[0].cpu().numpy()
tar = test_data['source', 'target'].edge_label_index[1].cpu().numpy()
pred = pred.cpu().numpy()
print(pred.shape)
target = target.cpu().numpy()

res=pd.DataFrame({'source': sour, 'target': tar, 'pred': pred, 'compare': target})
print(res.shape)

torch.Size([0])
Test RMSE: nan
(0,)
(0, 4)


In [27]:
#Add a new column if pred is greater or equal than 0.5 then 1 else 0.5
res['weight'] = np.where(res['pred']>=0.5, 1., 0.)

In [28]:
res
print(res['weight'].shape)

(0,)


In [29]:
#compare column rating_1 with target and if they are equal add up
cont=0
for i in res.itertuples():
    if i.compare == i.weight:
        cont+=1

#Calculate the accuracy
accuracy = cont/len(res)
print('Accuracy:', accuracy)
print('Number of correct predictions:', cont)

ZeroDivisionError: division by zero

In [30]:
pred_data = test_data

pred_data['source','weight','target']['edge_label']

tensor([])

In [31]:
pred_data['source','weight','target']['edge_label'] = torch.tensor(res['weight'], dtype=torch.long)
print(pred_data['source','weight','target']['edge_label'])
torch.save(pred_data, f'pred_data_{NTRACKS}.pt')

tensor([], dtype=torch.int64)


## Validation

In [32]:
connected_accuracy = 0.
nonconnected_accuracy = 0.

n1,n2=0,0
ncon,nncon=0,0
for i in res.itertuples():
    if i.compare == 0.:
        if i.compare == i.weight: n1+=1
        nncon+=1
    elif i.compare == 1.0:
        if i.compare == i.weight: n2+=1
        ncon+=1

connected_accuracy = n2/ncon
nonconnected_accuracy = n1/nncon

print(f'Accuracy in connected edges:     {n2}/{ncon} = {connected_accuracy}')
print(f'Accuracy in non connected edges: {n1}/{nncon} = {nonconnected_accuracy}')

ZeroDivisionError: division by zero

In [33]:
torch.save(model.state_dict(), f'model_{NTRACKS}_all.pth')

## Try with different graph

In [None]:
#read a new csv file
file_test = cwd + '/points_10.csv'
df_test=pd.read_csv(file_test, sep=',')

#Round the values of the dataset to 4 decimal places
df_test = df_test.round(4)

#Add a column to use as index from 0 to the length of the dataset
df_test['n_label'] = range(0, len(df_test))

#delete the column p_label
#df_test = df_test.drop('p_label', axis=1)

In [None]:
data_10=HeteroData()

nodes_s=df_test['n_label'].values
nodes_t=df_test['n_label'].values

data_10['source'].node_id = torch.tensor(nodes_s, dtype=torch.long)
data_10['target'].node_id = torch.tensor(nodes_t, dtype=torch.long)

data_10['source'].x = Tensor(df_test[['x', 'y', 'z']].values)
data_10['target'].x = Tensor(df_test[['x', 'y', 'z']].values)

In [None]:
edge_test_path = cwd + '/graph_10.csv'

# Importing the dataset
df_test_edge = pd.read_csv(edge_test_path)

edge_index_test = torch.tensor([df_test_edge['Source'], df_test_edge['Target']], dtype=torch.long)

data_10['source', 'weight', 'target'].edge_index = edge_index_test

weight_test = torch.from_numpy(df_test_edge['weight'].values).to(torch.float)

data_10['source', 'weight', 'target'].edge_label=weight_test

In [None]:
data_10= T.ToUndirected()(data_10)
del data_10['target', 'rev_weight', 'source'].edge_label

In [None]:
print(data_10.validate(raise_on_error=True))
data_10

In [None]:
with torch.no_grad():
    data_10 = data_10.to(device)
    pred = model(data_10.x_dict, data_10.edge_index_dict,
                 data_10['source', 'target'].edge_index)
    pred = pred.clamp(min=0, max=1)
    target = data_10['source', 'target'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    print(f'Test RMSE: {rmse:.4f}')

sour = data_10['source', 'target'].edge_index[0].cpu().numpy()
tar = data_10['source', 'target'].edge_index[1].cpu().numpy()
pred = pred.cpu().numpy()
target = target.cpu().numpy()

res=pd.DataFrame({'source': sour, 'target': tar, 'pred': pred, 'compare': target})

In [None]:
#Add a new column if pred is greater or equal than 0.5 then 1 else 0.5
res['weight'] = np.where(res['pred']>0.5, 1, 0.5)

#compare column rating_1 with target and if they are equal add up
cont=0
for i in res.itertuples():
    if i.compare == i.weight:
        cont+=1

#Calculate the accuracy
accuracy = cont/len(res)
print('Accuracy:', accuracy)
print('Number of correct predictions:', cont)