In [1]:
#Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch import Tensor
import os

os.environ['TORCH'] = torch.__version__
print(torch.__version__)

2.3.1+cu121


In [2]:
NTRACKS = 500
#Get the current working directory
basedir = os.getcwd()
csv_path = basedir + '/../csv_files'

train_points_file_path = csv_path + f'/points_{NTRACKS}_0.csv'
train_graph_file_path = csv_path + f'/graph_{NTRACKS}_0.csv'
val_points_file_path = csv_path + f'/points_{NTRACKS}_1.csv'
val_graph_file_path = csv_path + f'/graph_{NTRACKS}_1.csv'

In [3]:
df=pd.read_csv(train_points_file_path)
#Round the values of the dataset to 4 decimal places
df = df.round(4)
#Add a column to use as index from 0 to the length of the dataset
df['n_label'] = range(0, len(df))

df.head()

Unnamed: 0,x,y,z,N_side,N_layer,t_label,phi,eta,q,pt,d0,z0,n_label
0,-6.7562,-7.0174,39.5458,7,1,T0,-2.3718,2.1032,-1,66.9142,0.328,0.2611,0
1,-13.5744,-13.6599,77.9575,7,2,T0,-2.3718,2.1032,-1,66.9142,0.328,0.2611,1
2,-20.5362,-20.4651,117.2422,7,3,T0,-2.3718,2.1032,-1,66.9142,0.328,0.2611,2
3,-27.4093,-27.2063,156.0904,7,4,T0,-2.3718,2.1032,-1,66.9142,0.328,0.2611,3
4,-34.2711,-33.9589,194.9386,7,5,T0,-2.3718,2.1032,-1,66.9142,0.328,0.2611,4


In [4]:
df=pd.read_csv(val_points_file_path)
#Round the values of the dataset to 4 decimal places
df = df.round(4)
#Add a column to use as index from 0 to the length of the dataset
df['n_label'] = range(0, len(df))

df.head()

Unnamed: 0,x,y,z,N_side,N_layer,t_label,phi,eta,q,pt,d0,z0,n_label
0,-9.7994,-0.0326,15.0187,6,1,T0,-3.1203,1.1862,1,58.6725,-0.1667,0.4725,0
1,-19.8469,-0.2172,29.9379,6,2,T0,-3.1203,1.1862,1,58.6725,-0.1667,0.4725,1
2,-29.8947,-0.3818,44.8571,6,3,T0,-3.1203,1.1862,1,58.6725,-0.1667,0.4725,2
3,-39.9428,-0.5264,59.7763,6,4,T0,-3.1203,1.1862,1,58.6725,-0.1667,0.4725,3
4,-49.9912,-0.651,74.6955,6,5,T0,-3.1203,1.1862,1,58.6725,-0.1667,0.4725,4


## Build data

In [5]:
import torch_geometric.transforms as T
from torch_geometric.data import HeteroData

In [6]:
def buildData(points_path, edge_path):
    df=pd.read_csv(points_path)
    #Round the values of the dataset to 4 decimal places
    df = df.round(4)
    #Add a column to use as index from 0 to the length of the dataset
    df['n_label'] = range(0, len(df))
    
    #Empty hetero graph 
    data=HeteroData()

    #node names
    nodes_s=df['n_label'].values
    nodes_t=df['n_label'].values
    
    #Add nodes to the graph
    data['source'].node_id = torch.tensor(nodes_s, dtype=torch.long)
    data['target'].node_id = torch.tensor(nodes_t, dtype=torch.long)
    
    #Add node attributes, in this case the position of the points
    data['source'].x = Tensor(df[['x', 'y', 'z']].values)
    data['target'].x = Tensor(df[['x', 'y', 'z']].values)

    edge_path = basedir + f'/graph_{NTRACKS}.csv'
    
    # Importing the dataset
    df_edge = pd.read_csv(edge_path)
    df_edge = df_edge.replace({'weight':0.5}, 0.)

    edge_index = torch.tensor([df_edge['Source'], df_edge['Target']], dtype=torch.long)
    data['source', 'weight', 'target'].edge_index = edge_index
    
    #edge attributes
    weight_val = torch.from_numpy(df_edge['weight'].values).to(torch.float)
    
    data['source', 'weight', 'target'].edge_label = weight_val
    
    #check if the data is valid
    print(data.validate(raise_on_error=True))

    data = T.ToUndirected()(data)
    del data['target', 'rev_weight', 'source'].edge_label

    return data

In [7]:
train_data = buildData(train_points_file_path, train_graph_file_path)
train_data

True


HeteroData(
  source={
    node_id=[5000],
    x=[5000, 3],
  },
  target={
    node_id=[5000],
    x=[5000, 3],
  },
  (source, weight, target)={
    edge_index=[2, 20517],
    edge_label=[20517],
  },
  (target, rev_weight, source)={ edge_index=[2, 20517] }
)

In [8]:
val_data = buildData(train_points_file_path, train_graph_file_path)
val_data

True


HeteroData(
  source={
    node_id=[5000],
    x=[5000, 3],
  },
  target={
    node_id=[5000],
    x=[5000, 3],
  },
  (source, weight, target)={
    edge_index=[2, 20517],
    edge_label=[20517],
  },
  (target, rev_weight, source)={ edge_index=[2, 20517] }
)

## Model

In [9]:
from torch_geometric.nn import SAGEConv, to_hetero

class GNNEncoder(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x


class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin1 = torch.nn.Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = torch.nn.Linear(hidden_channels, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict['source'][row], z_dict['target'][col]], dim=-1)

        z = self.lin1(z).relu()
        z = self.lin2(z)
        return z.view(-1)


class Model(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.encoder = GNNEncoder(hidden_channels, hidden_channels)
        self.encoder = to_hetero(self.encoder, train_data.metadata(), aggr='sum')
        self.decoder = EdgeDecoder(hidden_channels)

    def forward(self, x_dict, edge_index_dict, edge_label_index):
        z_dict = self.encoder(x_dict, edge_index_dict)
        return self.decoder(z_dict, edge_label_index)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = Model(hidden_channels=32).to(device)

print(model)

Model(
  (encoder): GraphModule(
    (conv1): ModuleDict(
      (source__weight__target): SAGEConv((-1, -1), 32, aggr=mean)
      (target__rev_weight__source): SAGEConv((-1, -1), 32, aggr=mean)
    )
    (conv2): ModuleDict(
      (source__weight__target): SAGEConv((-1, -1), 32, aggr=mean)
      (target__rev_weight__source): SAGEConv((-1, -1), 32, aggr=mean)
    )
  )
  (decoder): EdgeDecoder(
    (lin1): Linear(in_features=64, out_features=32, bias=True)
    (lin2): Linear(in_features=32, out_features=1, bias=True)
  )
)


In [10]:
import torch.nn.functional as F

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

def train():
    model.train()
    optimizer.zero_grad()
    pred = model(train_data.x_dict, train_data.edge_index_dict,
                 train_data['source', 'target'].edge_index)
    target = train_data['source', 'target'].edge_label
    loss = F.mse_loss(pred, target)
    loss.backward()
    optimizer.step()
    return float(loss)

@torch.no_grad()
def test(data):
    data = data.to(device)
    model.eval()
    pred = model(data.x_dict, data.edge_index_dict,
                 data['source', 'target'].edge_index)
    pred = pred.clamp(min=0, max=1)
    target = data['source', 'target'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    return float(rmse)

In [11]:
for epoch in range(1, 3001):
    train_data = train_data.to(device)
    loss = train()
    train_rmse = test(train_data)
    val_rmse = test(val_data)
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_rmse:.4f}, '
          f'Val: {val_rmse:.4f}')

Epoch: 001, Loss: 18.1628, Train: 0.5908, Val: 0.5908
Epoch: 002, Loss: 481.9803, Train: 0.7607, Val: 0.7607
Epoch: 003, Loss: 30.8433, Train: 0.8169, Val: 0.8169
Epoch: 004, Loss: 8.9964, Train: 0.6333, Val: 0.6333
Epoch: 005, Loss: 6.4734, Train: 0.4752, Val: 0.4752
Epoch: 006, Loss: 5.4389, Train: 0.4708, Val: 0.4708
Epoch: 007, Loss: 2.2994, Train: 0.6322, Val: 0.6322
Epoch: 008, Loss: 1.6513, Train: 0.7270, Val: 0.7270
Epoch: 009, Loss: 2.2572, Train: 0.6959, Val: 0.6959
Epoch: 010, Loss: 1.5807, Train: 0.5970, Val: 0.5970
Epoch: 011, Loss: 1.1892, Train: 0.4995, Val: 0.4995
Epoch: 012, Loss: 1.2335, Train: 0.4622, Val: 0.4622
Epoch: 013, Loss: 0.9736, Train: 0.4485, Val: 0.4485
Epoch: 014, Loss: 0.5190, Train: 0.4553, Val: 0.4553
Epoch: 015, Loss: 0.3256, Train: 0.5035, Val: 0.5035
Epoch: 016, Loss: 0.3911, Train: 0.5326, Val: 0.5326
Epoch: 017, Loss: 0.4814, Train: 0.5266, Val: 0.5266
Epoch: 018, Loss: 0.4744, Train: 0.5073, Val: 0.5073
Epoch: 019, Loss: 0.4238, Train: 0.4925, V

In [12]:
edge_path = basedir + f'/graph_{NTRACKS}.csv'

# Importing the dataset
df_edge = pd.read_csv(edge_path)
df_edge = df_edge.replace({'weight':0.5}, 0.)

In [13]:
edge_index = torch.tensor([df_edge['Source'], df_edge['Target']], dtype=torch.long)
data['source', 'weight', 'target'].edge_index = edge_index

#edge attributes
weight_val = torch.from_numpy(df_edge['weight'].values).to(torch.float)

data['source', 'weight', 'target'].edge_label = weight_val

#check if the data is valid
data.validate(raise_on_error=True)

NameError: name 'data' is not defined

In [None]:
import torch_geometric.transforms as T
data = T.ToUndirected()(data)

del data['target', 'rev_weight', 'source'].edge_label

print(data.validate(raise_on_error=True))
data

In [None]:
train_data, val_data, test_data = T.RandomLinkSplit(
    num_val=0.00,
    num_test=0.00,
    neg_sampling_ratio=0.0,
    edge_types=[('source', 'weight', 'target')],
    rev_edge_types=[('target', 'rev_weight', 'source')],
)(data)

test_data

In [14]:
test_data = val_data

with torch.no_grad():
    test_data = test_data.to(device)
    pred = model(test_data.x_dict, test_data.edge_index_dict,
                 test_data['source', 'target'].edge_index)
    print(pred.shape)
    pred = pred.clamp(min=0, max=1)
    target = test_data['source', 'target'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    print(f'Test RMSE: {rmse:.4f}')

sour = test_data['source', 'target'].edge_index[0].cpu().numpy()
tar = test_data['source', 'target'].edge_index[1].cpu().numpy()
pred = pred.cpu().numpy()
print(pred.shape)
target = target.cpu().numpy()

res=pd.DataFrame({'source': sour, 'target': tar, 'pred': pred, 'compare': target})
print(res.shape)

#Add a new column if pred is greater or equal than 0.5 then 1 else 0.5
res['weight'] = np.where(res['pred']>=0.5, 1., 0.)

torch.Size([20517])
Test RMSE: 0.2506
(20517,)
(20517, 4)


In [15]:
#compare column rating_1 with target and if they are equal add up
cont=0
for i in res.itertuples():
    if i.compare == i.weight:
        cont+=1

#Calculate the accuracy
accuracy = cont/len(res)
print('Accuracy:', accuracy)
print('Number of correct predictions:', cont)

Accuracy: 0.9186528244870108
Number of correct predictions: 18848


In [16]:
pred_data = test_data

pred_data['source','weight','target']['edge_label']

tensor([1., 0., 0.,  ..., 1., 1., 1.])

In [17]:
pred_data['source','weight','target']['edge_label'] = torch.tensor(res['weight'], dtype=torch.long)
print(pred_data['source','weight','target']['edge_label'])

tensor([0, 0, 0,  ..., 1, 1, 1])


## Save model

In [18]:
torch.save(model.state_dict(), f'model_{NTRACKS}_all.pth')

## Validation

In [19]:
connected_accuracy = 0.
nonconnected_accuracy = 0.

n1,n2=0,0
ncon,nncon=0,0
for i in res.itertuples():
    if i.compare == 0.:
        if i.compare == i.weight: n1+=1
        nncon+=1
    elif i.compare == 1.0:
        if i.compare == i.weight: n2+=1
        ncon+=1

connected_accuracy = n2/ncon
nonconnected_accuracy = n1/nncon

print(f'Accuracy in connected edges:     {n2}/{ncon} = {connected_accuracy}')
print(f'Accuracy in non connected edges: {n1}/{nncon} = {nonconnected_accuracy}')

Accuracy in connected edges:     3300/4409 = 0.7484690405987753
Accuracy in non connected edges: 15548/16108 = 0.9652346660044698


In [None]:
#read a new csv file
file_test = cwd + '/points_10.csv'
df_test=pd.read_csv(file_test, sep=',')

#Round the values of the dataset to 4 decimal places
df_test = df_test.round(4)

#Add a column to use as index from 0 to the length of the dataset
df_test['n_label'] = range(0, len(df_test))

#delete the column p_label
#df_test = df_test.drop('p_label', axis=1)

In [None]:
data_10=HeteroData()

nodes_s=df_test['n_label'].values
nodes_t=df_test['n_label'].values

data_10['source'].node_id = torch.tensor(nodes_s, dtype=torch.long)
data_10['target'].node_id = torch.tensor(nodes_t, dtype=torch.long)

data_10['source'].x = Tensor(df_test[['x', 'y', 'z']].values)
data_10['target'].x = Tensor(df_test[['x', 'y', 'z']].values)

In [None]:
edge_test_path = cwd + '/graph_10.csv'

# Importing the dataset
df_test_edge = pd.read_csv(edge_test_path)

edge_index_test = torch.tensor([df_test_edge['Source'], df_test_edge['Target']], dtype=torch.long)

data_10['source', 'weight', 'target'].edge_index = edge_index_test

weight_test = torch.from_numpy(df_test_edge['weight'].values).to(torch.float)

data_10['source', 'weight', 'target'].edge_label=weight_test

In [None]:
data_10= T.ToUndirected()(data_10)
del data_10['target', 'rev_weight', 'source'].edge_label

In [None]:
print(data_10.validate(raise_on_error=True))
data_10

In [None]:
with torch.no_grad():
    data_10 = data_10.to(device)
    pred = model(data_10.x_dict, data_10.edge_index_dict,
                 data_10['source', 'target'].edge_index)
    pred = pred.clamp(min=0, max=1)
    target = data_10['source', 'target'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    print(f'Test RMSE: {rmse:.4f}')

sour = data_10['source', 'target'].edge_index[0].cpu().numpy()
tar = data_10['source', 'target'].edge_index[1].cpu().numpy()
pred = pred.cpu().numpy()
target = target.cpu().numpy()

res=pd.DataFrame({'source': sour, 'target': tar, 'pred': pred, 'compare': target})

In [None]:
#Add a new column if pred is greater or equal than 0.5 then 1 else 0.5
res['weight'] = np.where(res['pred']>0.5, 1, 0.5)

#compare column rating_1 with target and if they are equal add up
cont=0
for i in res.itertuples():
    if i.compare == i.weight:
        cont+=1

#Calculate the accuracy
accuracy = cont/len(res)
print('Accuracy:', accuracy)
print('Number of correct predictions:', cont)