In [1]:
#Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch import Tensor
import os

os.environ['TORCH'] = torch.__version__
print(torch.__version__)

2.3.0+cu121


In [2]:
#Get the current working directory
cwd = os.getcwd()
file_path = cwd + '/points_150.csv'

In [3]:
df=pd.read_csv(file_path)

#Round the values of the dataset to 4 decimal places
df = df.round(4)

#Add a column to use as index from 0 to the length of the dataset
df['n_label'] = range(0, len(df))

#delete the column p_label
df = df.drop('p_label', axis=1)

df.head()

Unnamed: 0,x,y,z,N_side,N_layer,t_label,phi,eta,q,pt,d0,z0,n_label
0,7.8856,-5.8159,-29.5959,10,1,T0,-0.8395,-2.1563,-1,54.6582,0.0198,0.1129,0
1,14.315,-12.9382,-70.4852,10,2,T0,-0.8395,-2.1563,-1,54.6582,0.0198,0.1129,1
2,20.7589,-20.0473,-111.3745,10,3,T0,-0.8395,-2.1563,-1,54.6582,0.0198,0.1129,2
3,27.2174,-27.1433,-152.2638,10,4,T0,-0.8395,-2.1563,-1,54.6582,0.0198,0.1129,3
4,33.6903,-34.226,-193.1531,10,5,T0,-0.8395,-2.1563,-1,54.6582,0.0198,0.1129,4


In [4]:
import torch_geometric.transforms as T
from torch_geometric.data import HeteroData

In [8]:
#Empty hetero graph 
data=HeteroData()

## Add nodes

In [9]:
#node names
nodes_s=df['n_label'].values
nodes_t=df['n_label'].values

In [10]:
#Add nodes to the graph
data['source'].node_id = torch.tensor(nodes_s, dtype=torch.long)
data['target'].node_id = torch.tensor(nodes_t, dtype=torch.long)

In [11]:
#Add node attributes, in this case the position of the points
data['source'].x = Tensor(df[['x', 'y', 'z']].values)
data['target'].x = Tensor(df[['x', 'y', 'z']].values)

In [12]:
data

HeteroData(
  source={
    node_id=[1500],
    x=[1500, 3],
  },
  target={
    node_id=[1500],
    x=[1500, 3],
  }
)

## Add edges

In [13]:
edge_path = cwd + '/grap_150.csv'

# Importing the dataset
df_edge = pd.read_csv(edge_path)

In [14]:
edge_index = torch.tensor([df_edge['Source'], df_edge['Target']], dtype=torch.long)

In [15]:
data['source', 'weight', 'target'].edge_index = edge_index

In [16]:
data

HeteroData(
  source={
    node_id=[1500],
    x=[1500, 3],
  },
  target={
    node_id=[1500],
    x=[1500, 3],
  },
  (source, weight, target)={ edge_index=[2, 3538] }
)

In [17]:
#edge attributes
weight_val = torch.from_numpy(df_edge['weight'].values).to(torch.float)

In [18]:
data['source', 'weight', 'target'].edge_label=weight_val

In [19]:
data

HeteroData(
  source={
    node_id=[1500],
    x=[1500, 3],
  },
  target={
    node_id=[1500],
    x=[1500, 3],
  },
  (source, weight, target)={
    edge_index=[2, 3538],
    edge_label=[3538],
  }
)

In [20]:
#check if the data is valid
data.validate(raise_on_error=True)

True

In [21]:
import torch_geometric.transforms as T
data = T.ToUndirected()(data)

In [22]:
del data['target', 'rev_weight', 'source'].edge_label

In [23]:
print(data.validate(raise_on_error=True))
data

True


HeteroData(
  source={
    node_id=[1500],
    x=[1500, 3],
  },
  target={
    node_id=[1500],
    x=[1500, 3],
  },
  (source, weight, target)={
    edge_index=[2, 3538],
    edge_label=[3538],
  },
  (target, rev_weight, source)={ edge_index=[2, 3538] }
)

In [24]:
train_data, val_data, test_data = T.RandomLinkSplit(
    num_val=0.1,
    num_test=0.1,
    neg_sampling_ratio=0.0,
    edge_types=[('source', 'weight', 'target')],
    rev_edge_types=[('target', 'rev_weight', 'source')],
)(data)

In [25]:
train_data

HeteroData(
  source={
    node_id=[1500],
    x=[1500, 3],
  },
  target={
    node_id=[1500],
    x=[1500, 3],
  },
  (source, weight, target)={
    edge_index=[2, 2832],
    edge_label=[2832],
    edge_label_index=[2, 2832],
  },
  (target, rev_weight, source)={ edge_index=[2, 2832] }
)

## Model

In [26]:
from torch_geometric.nn import SAGEConv, to_hetero

class GNNEncoder(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x


class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin1 = torch.nn.Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = torch.nn.Linear(hidden_channels, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict['source'][row], z_dict['target'][col]], dim=-1)

        z = self.lin1(z).relu()
        z = self.lin2(z)
        return z.view(-1)


class Model(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.encoder = GNNEncoder(hidden_channels, hidden_channels)
        self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')
        self.decoder = EdgeDecoder(hidden_channels)

    def forward(self, x_dict, edge_index_dict, edge_label_index):
        z_dict = self.encoder(x_dict, edge_index_dict)
        return self.decoder(z_dict, edge_label_index)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = Model(hidden_channels=32).to(device)

print(model)

Model(
  (encoder): GraphModule(
    (conv1): ModuleDict(
      (source__weight__target): SAGEConv((-1, -1), 32, aggr=mean)
      (target__rev_weight__source): SAGEConv((-1, -1), 32, aggr=mean)
    )
    (conv2): ModuleDict(
      (source__weight__target): SAGEConv((-1, -1), 32, aggr=mean)
      (target__rev_weight__source): SAGEConv((-1, -1), 32, aggr=mean)
    )
  )
  (decoder): EdgeDecoder(
    (lin1): Linear(in_features=64, out_features=32, bias=True)
    (lin2): Linear(in_features=32, out_features=1, bias=True)
  )
)


In [27]:
import torch.nn.functional as F

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

def train():
    model.train()
    optimizer.zero_grad()
    pred = model(train_data.x_dict, train_data.edge_index_dict,
                 train_data['source', 'target'].edge_label_index)
    target = train_data['source', 'target'].edge_label
    loss = F.mse_loss(pred, target)
    loss.backward()
    optimizer.step()
    return float(loss)

@torch.no_grad()
def test(data):
    data = data.to(device)
    model.eval()
    pred = model(data.x_dict, data.edge_index_dict,
                 data['source', 'target'].edge_label_index)
    pred = pred.clamp(min=0, max=1)
    target = data['source', 'target'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    return float(rmse)

In [28]:
for epoch in range(1, 301):
    train_data = train_data.to(device)
    loss = train()
    train_rmse = test(train_data)
    val_rmse = test(val_data)
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_rmse:.4f}, '
          f'Val: {val_rmse:.4f}')

Epoch: 001, Loss: 40.8858, Train: 0.3504, Val: 0.3470
Epoch: 002, Loss: 2009.2610, Train: 0.3513, Val: 0.3473
Epoch: 003, Loss: 130.8028, Train: 0.6048, Val: 0.5859
Epoch: 004, Loss: 23.6549, Train: 0.6895, Val: 0.6863
Epoch: 005, Loss: 57.1019, Train: 0.7477, Val: 0.7165
Epoch: 006, Loss: 35.2679, Train: 0.7408, Val: 0.7037
Epoch: 007, Loss: 7.1158, Train: 0.6505, Val: 0.6468
Epoch: 008, Loss: 1.1593, Train: 0.5736, Val: 0.5902
Epoch: 009, Loss: 0.9359, Train: 0.5450, Val: 0.5651
Epoch: 010, Loss: 1.3640, Train: 0.5247, Val: 0.5469
Epoch: 011, Loss: 1.6383, Train: 0.5062, Val: 0.5310
Epoch: 012, Loss: 1.6828, Train: 0.4883, Val: 0.5170
Epoch: 013, Loss: 1.4607, Train: 0.4736, Val: 0.5049
Epoch: 014, Loss: 1.0764, Train: 0.4675, Val: 0.5020
Epoch: 015, Loss: 0.6886, Train: 0.4856, Val: 0.5150
Epoch: 016, Loss: 0.5365, Train: 0.5080, Val: 0.5314
Epoch: 017, Loss: 0.5937, Train: 0.5042, Val: 0.5017
Epoch: 018, Loss: 0.5047, Train: 0.5317, Val: 0.5209
Epoch: 019, Loss: 0.3476, Train: 0.56

In [29]:
with torch.no_grad():
    test_data = test_data.to(device)
    pred = model(test_data.x_dict, test_data.edge_index_dict,
                 test_data['source', 'target'].edge_label_index)
    pred = pred.clamp(min=0, max=1)
    target = test_data['source', 'target'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    print(f'Test RMSE: {rmse:.4f}')

sour = test_data['source', 'target'].edge_label_index[0].cpu().numpy()
tar = test_data['source', 'target'].edge_label_index[1].cpu().numpy()
pred = pred.cpu().numpy()
target = target.cpu().numpy()

res=pd.DataFrame({'source': sour, 'target': tar, 'pred': pred, 'compare': target})

Test RMSE: 0.3099


In [30]:
#Add a new column if pred is greater or equal than 0.5 then 1 else 0.5
res['weight'] = np.where(res['pred']>=0.5, 1, 0.5)

In [31]:
res.head()

Unnamed: 0,source,target,pred,compare,weight
0,196,197,0.785356,1.0,1.0
1,1257,1258,0.474498,1.0,0.5
2,473,114,0.672265,0.5,1.0
3,258,259,0.920634,1.0,1.0
4,917,508,0.61854,0.5,1.0


In [32]:
#compare column rating_1 with target and if they are equal add up
cont=0
for i in res.itertuples():
    if i.compare == i.weight:
        cont+=1

#Calculate the accuracy
accuracy = cont/len(res)
print('Accuracy:', accuracy)
print('Number of correct predictions:', cont)

Accuracy: 0.546742209631728
Number of correct predictions: 193


## Try with different graph

In [33]:
#read a new csv file
file_test = cwd + '/points_10.csv'
df_test=pd.read_csv(file_test, sep=',')

#Round the values of the dataset to 4 decimal places
df_test = df_test.round(4)

#Add a column to use as index from 0 to the length of the dataset
df_test['n_label'] = range(0, len(df_test))

#delete the column p_label
df_test = df_test.drop('p_label', axis=1)

In [34]:
data_10=HeteroData()

nodes_s=df_test['n_label'].values
nodes_t=df_test['n_label'].values

data_10['source'].node_id = torch.tensor(nodes_s, dtype=torch.long)
data_10['target'].node_id = torch.tensor(nodes_t, dtype=torch.long)

data_10['source'].x = Tensor(df_test[['x', 'y', 'z']].values)
data_10['target'].x = Tensor(df_test[['x', 'y', 'z']].values)

In [35]:
edge_test_path = cwd + '/grap_10.csv'

# Importing the dataset
df_test_edge = pd.read_csv(edge_test_path)

edge_index_test = torch.tensor([df_test_edge['Source'], df_test_edge['Target']], dtype=torch.long)

data_10['source', 'weight', 'target'].edge_index = edge_index_test

weight_test = torch.from_numpy(df_test_edge['weight'].values).to(torch.float)

data_10['source', 'weight', 'target'].edge_label=weight_test

In [36]:
data_10= T.ToUndirected()(data_10)
del data_10['target', 'rev_weight', 'source'].edge_label

In [37]:
print(data_10.validate(raise_on_error=True))
data_10

True


HeteroData(
  source={
    node_id=[100],
    x=[100, 3],
  },
  target={
    node_id=[100],
    x=[100, 3],
  },
  (source, weight, target)={
    edge_index=[2, 107],
    edge_label=[107],
  },
  (target, rev_weight, source)={ edge_index=[2, 107] }
)

In [39]:
with torch.no_grad():
    data_10 = data_10.to(device)
    pred = model(data_10.x_dict, data_10.edge_index_dict,
                 data_10['source', 'target'].edge_index)
    pred = pred.clamp(min=0, max=1)
    target = data_10['source', 'target'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    print(f'Test RMSE: {rmse:.4f}')

sour = data_10['source', 'target'].edge_index[0].cpu().numpy()
tar = data_10['source', 'target'].edge_index[1].cpu().numpy()
pred = pred.cpu().numpy()
target = target.cpu().numpy()

res=pd.DataFrame({'source': sour, 'target': tar, 'pred': pred, 'compare': target})

Test RMSE: 0.3585


In [40]:
#Add a new column if pred is greater or equal than 0.5 then 1 else 0.5
res['weight'] = np.where(res['pred']>0.5, 1, 0.5)

#compare column rating_1 with target and if they are equal add up
cont=0
for i in res.itertuples():
    if i.compare == i.weight:
        cont+=1

#Calculate the accuracy
accuracy = cont/len(res)
print('Accuracy:', accuracy)
print('Number of correct predictions:', cont)

Accuracy: 0.6728971962616822
Number of correct predictions: 72
