In [1]:
#Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch import Tensor
import os

os.environ['TORCH'] = torch.__version__
print(torch.__version__)

2.3.1+cu121


In [2]:
NTRACKS = 1000
WINDOW = 45
#Get the current working directory
basedir = os.getcwd() + '/..'
csv_point_path = basedir + '/point_files'
csv_graph_path = basedir + f'/graph_{WINDOW}'

train_points_file_path = csv_point_path + f'/points_{NTRACKS}_0.csv'
train_graph_file_path = csv_graph_path + f'/graph_{NTRACKS}_0.csv'
val_points_file_path = csv_point_path + f'/points_{100}.csv'
val_graph_file_path = csv_graph_path + f'/graph_{100}.csv'

print(csv_point_path)
print(csv_graph_path)

/home/lopezr/Documents/work/teaching/tfm_carlos/TFM-UC/notebooks_gnn/../point_files
/home/lopezr/Documents/work/teaching/tfm_carlos/TFM-UC/notebooks_gnn/../graph_45


In [3]:
df=pd.read_csv(train_points_file_path)
#Round the values of the dataset to 4 decimal places
df = df.round(4)
#Add a column to use as index from 0 to the length of the dataset
df['n_label'] = range(0, len(df))

df.head()

Unnamed: 0,x,y,z,N_side,N_layer,t_label,phi,eta,q,pt,d0,z0,n_label
0,7.5387,-6.0894,15.4743,10,1,T0,-0.7125,1.2594,-1,99.9612,0.3144,-0.2145,0
1,14.879,-12.4083,31.1631,10,2,T0,-0.7125,1.2594,-1,99.9612,0.3144,-0.2145,1
2,22.2264,-18.7191,46.8519,10,3,T0,-0.7125,1.2594,-1,99.9612,0.3144,-0.2145,2
3,29.5809,-25.0215,62.5407,10,4,T0,-0.7125,1.2594,-1,99.9612,0.3144,-0.2145,3
4,36.9425,-31.3156,78.2296,10,5,T0,-0.7125,1.2594,-1,99.9612,0.3144,-0.2145,4


In [4]:
df=pd.read_csv(val_points_file_path)
#Round the values of the dataset to 4 decimal places
df = df.round(4)
#Add a column to use as index from 0 to the length of the dataset
df['n_label'] = range(0, len(df))

df.head()

Unnamed: 0,x,y,z,N_side,N_layer,t_label,phi,eta,q,pt,d0,z0,n_label
0,-7.955,-5.5655,10.9497,7,1,T0,-2.5813,0.9481,-1,93.0602,0.4814,0.3166,0
1,-16.4159,-10.8949,21.9151,7,2,T0,-2.5813,0.9481,-1,93.0602,0.4814,0.3166,1
2,-24.8701,-16.2347,32.8806,7,3,T0,-2.5813,0.9481,-1,93.0602,0.4814,0.3166,2
3,-33.0617,-21.4228,43.5137,7,4,T0,-2.5813,0.9481,-1,93.0602,0.4814,0.3166,3
4,-41.5028,-26.7835,54.4791,7,5,T0,-2.5813,0.9481,-1,93.0602,0.4814,0.3166,4


## Build data

In [5]:
import torch_geometric.transforms as T
from torch_geometric.data import HeteroData

In [6]:
def buildData(points_path, edge_path):
    df=pd.read_csv(points_path)
    #Round the values of the dataset to 4 decimal places
    df = df.round(4)
    #Add a column to use as index from 0 to the length of the dataset
    df['n_label'] = range(0, len(df))
    
    #Empty hetero graph 
    data=HeteroData()

    #node names
    nodes_s=df['n_label'].values
    nodes_t=df['n_label'].values
    
    #Add nodes to the graph
    data['source'].node_id = torch.tensor(nodes_s, dtype=torch.long)
    data['target'].node_id = torch.tensor(nodes_t, dtype=torch.long)
    
    #Add node attributes, in this case the position of the points
    data['source'].x = Tensor(df[['x', 'y', 'z']].values)
    data['target'].x = Tensor(df[['x', 'y', 'z']].values)
    
    # Importing the dataset
    df_edge = pd.read_csv(edge_path)
    df_edge = df_edge.replace({'weight':0.5}, 0.)

    edge_index = torch.tensor([df_edge['Source'], df_edge['Target']], dtype=torch.long)
    data['source', 'weight', 'target'].edge_index = edge_index
    
    #edge attributes
    weight_val = torch.from_numpy(df_edge['weight'].values).to(torch.float)
    
    data['source', 'weight', 'target'].edge_label = weight_val
    
    #check if the data is valid
    print(data.validate(raise_on_error=True))

    data = T.ToUndirected()(data)
    del data['target', 'rev_weight', 'source'].edge_label

    return data

In [7]:
train_data = buildData(train_points_file_path, train_graph_file_path)
train_data

True


HeteroData(
  source={
    node_id=[10000],
    x=[10000, 3],
  },
  target={
    node_id=[10000],
    x=[10000, 3],
  },
  (source, weight, target)={
    edge_index=[2, 168242],
    edge_label=[168242],
  },
  (target, rev_weight, source)={ edge_index=[2, 168242] }
)

In [8]:
val_data = buildData(val_points_file_path, val_graph_file_path)
val_data

True


HeteroData(
  source={
    node_id=[1000],
    x=[1000, 3],
  },
  target={
    node_id=[1000],
    x=[1000, 3],
  },
  (source, weight, target)={
    edge_index=[2, 2443],
    edge_label=[2443],
  },
  (target, rev_weight, source)={ edge_index=[2, 2443] }
)

## Model

In [9]:
from torch_geometric.nn import SAGEConv, to_hetero

class GNNEncoder(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x


class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin1 = torch.nn.Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = torch.nn.Linear(hidden_channels, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict['source'][row], z_dict['target'][col]], dim=-1)

        z = self.lin1(z).relu()
        z = self.lin2(z)
        return z.view(-1)


class Model(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.encoder = GNNEncoder(hidden_channels, hidden_channels)
        self.encoder = to_hetero(self.encoder, train_data.metadata(), aggr='sum')
        self.decoder = EdgeDecoder(hidden_channels)

    def forward(self, x_dict, edge_index_dict, edge_label_index):
        z_dict = self.encoder(x_dict, edge_index_dict)
        return self.decoder(z_dict, edge_label_index)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = Model(hidden_channels=32).to(device)

print(model)

Model(
  (encoder): GraphModule(
    (conv1): ModuleDict(
      (source__weight__target): SAGEConv((-1, -1), 32, aggr=mean)
      (target__rev_weight__source): SAGEConv((-1, -1), 32, aggr=mean)
    )
    (conv2): ModuleDict(
      (source__weight__target): SAGEConv((-1, -1), 32, aggr=mean)
      (target__rev_weight__source): SAGEConv((-1, -1), 32, aggr=mean)
    )
  )
  (decoder): EdgeDecoder(
    (lin1): Linear(in_features=64, out_features=32, bias=True)
    (lin2): Linear(in_features=32, out_features=1, bias=True)
  )
)


In [10]:
import torch.nn.functional as F

optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

def train():
    model.train()
    optimizer.zero_grad()
    pred = model(train_data.x_dict, train_data.edge_index_dict,
                 train_data['source', 'target'].edge_index)
    target = train_data['source', 'target'].edge_label
    loss = F.mse_loss(pred, target)
    loss.backward()
    optimizer.step()
    return float(loss)

@torch.no_grad()
def test(data):
    data = data.to(device)
    model.eval()
    pred = model(data.x_dict, data.edge_index_dict,
                 data['source', 'target'].edge_index)
    pred = pred.clamp(min=0, max=1)
    target = data['source', 'target'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    return float(rmse)

In [11]:
for epoch in range(1, 3001):
    train_data = train_data.to(device)
    loss = train()
    train_rmse = test(train_data)
    val_rmse = test(val_data)
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_rmse:.4f}, '
          f'Val: {val_rmse:.4f}')

Epoch: 001, Loss: 22.8230, Train: 0.7339, Val: 0.7892
Epoch: 002, Loss: 125.7657, Train: 0.7357, Val: 0.7511
Epoch: 003, Loss: 14.8815, Train: 0.6584, Val: 0.6021
Epoch: 004, Loss: 17.2849, Train: 0.5958, Val: 0.5773
Epoch: 005, Loss: 29.2295, Train: 0.5098, Val: 0.5767
Epoch: 006, Loss: 17.4189, Train: 0.2680, Val: 0.5878
Epoch: 007, Loss: 7.3545, Train: 0.2957, Val: 0.5795
Epoch: 008, Loss: 2.9411, Train: 0.5116, Val: 0.6149
Epoch: 009, Loss: 2.5343, Train: 0.6335, Val: 0.6851
Epoch: 010, Loss: 3.9344, Train: 0.7204, Val: 0.6925
Epoch: 011, Loss: 4.4152, Train: 0.7304, Val: 0.6714
Epoch: 012, Loss: 3.5024, Train: 0.6719, Val: 0.6272
Epoch: 013, Loss: 2.0472, Train: 0.5418, Val: 0.5844
Epoch: 014, Loss: 0.9358, Train: 0.3888, Val: 0.5750
Epoch: 015, Loss: 0.6085, Train: 0.2785, Val: 0.5678
Epoch: 016, Loss: 0.8417, Train: 0.2445, Val: 0.5671
Epoch: 017, Loss: 1.0611, Train: 0.2377, Val: 0.5724
Epoch: 018, Loss: 0.9697, Train: 0.2437, Val: 0.5707
Epoch: 019, Loss: 0.6648, Train: 0.2766

In [12]:
test_data = val_data

with torch.no_grad():
    test_data = test_data.to(device)
    pred = model(test_data.x_dict, test_data.edge_index_dict,
                 test_data['source', 'target'].edge_index)
    print(pred.shape)
    pred = pred.clamp(min=0, max=1)
    target = test_data['source', 'target'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    print(f'Test RMSE: {rmse:.4f}')

sour = test_data['source', 'target'].edge_index[0].cpu().numpy()
tar = test_data['source', 'target'].edge_index[1].cpu().numpy()
pred = pred.cpu().numpy()
print(pred.shape)
target = target.cpu().numpy()

res=pd.DataFrame({'source': sour, 'target': tar, 'pred': pred, 'compare': target})
print(res.shape)

#Add a new column if pred is greater or equal than 0.5 then 1 else 0.5
res['weight'] = np.where(res['pred']>=0.5, 1., 0.)

torch.Size([2443])
Test RMSE: 0.5137
(2443,)
(2443, 4)


In [13]:
#compare column rating_1 with target and if they are equal add up
cont=0
for i in res.itertuples():
    if i.compare == i.weight:
        cont+=1

#Calculate the accuracy
accuracy = cont/len(res)
print('Accuracy:', accuracy)
print('Number of correct predictions:', cont)

Accuracy: 0.6500204666393778
Number of correct predictions: 1588


In [14]:
pred_data = test_data

pred_data['source','weight','target']['edge_label']

tensor([1., 0., 0.,  ..., 1., 0., 1.])

In [15]:
pred_data['source','weight','target']['edge_label'] = torch.tensor(res['weight'], dtype=torch.long)
print(pred_data['source','weight','target']['edge_label'])

tensor([0, 0, 0,  ..., 0, 0, 0])


## Save model

In [16]:
torch.save(model.state_dict(), f'model_{NTRACKS}_val100_epochs3000_{WINDOW}_all_test.pth')

## Validation

In [17]:
connected_accuracy = 0.
nonconnected_accuracy = 0.

n1,n2=0,0
ncon,nncon=0,0
for i in res.itertuples():
    if i.compare == 0.:
        if i.compare == i.weight: n1+=1
        nncon+=1
    elif i.compare == 1.0:
        if i.compare == i.weight: n2+=1
        ncon+=1

connected_accuracy = n2/ncon
nonconnected_accuracy = n1/nncon

print(f'Accuracy in connected edges:     {n2}/{ncon} = {connected_accuracy}')
print(f'Accuracy in non connected edges: {n1}/{nncon} = {nonconnected_accuracy}')

Accuracy in connected edges:     49/900 = 0.05444444444444444
Accuracy in non connected edges: 1539/1543 = 0.9974076474400518
