In [1]:
# Install required packages.
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.10.0+cu113.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.10.0+cu113.html
!pip install -q git+https://github.com/rusty1s/pytorch_geometric.git

In [2]:
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

In [3]:
train_dataset = torch.load('train.pt')
test_dataset = torch.load('test.pt')

In [4]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

for step, data in enumerate(train_loader):
    print(f'Step {step + 1}:')
    print('=======')
    print(data)
    print()

Step 1:
DataBatch(x=[1140, 11], edge_index=[2, 2364], edge_attr=[2364, 4], y=[64], pos=[1140, 3], name=[64], batch=[1140], ptr=[65])

Step 2:
DataBatch(x=[1128, 11], edge_index=[2, 2344], edge_attr=[2344, 4], y=[64], pos=[1128, 3], name=[64], batch=[1128], ptr=[65])

Step 3:
DataBatch(x=[1160, 11], edge_index=[2, 2384], edge_attr=[2384, 4], y=[64], pos=[1160, 3], name=[64], batch=[1160], ptr=[65])

Step 4:
DataBatch(x=[1148, 11], edge_index=[2, 2358], edge_attr=[2358, 4], y=[64], pos=[1148, 3], name=[64], batch=[1148], ptr=[65])

Step 5:
DataBatch(x=[1144, 11], edge_index=[2, 2352], edge_attr=[2352, 4], y=[64], pos=[1144, 3], name=[64], batch=[1144], ptr=[65])

Step 6:
DataBatch(x=[1148, 11], edge_index=[2, 2382], edge_attr=[2382, 4], y=[64], pos=[1148, 3], name=[64], batch=[1148], ptr=[65])

Step 7:
DataBatch(x=[1174, 11], edge_index=[2, 2430], edge_attr=[2430, 4], y=[64], pos=[1174, 3], name=[64], batch=[1174], ptr=[65])

Step 8:
DataBatch(x=[1209, 11], edge_index=[2, 2494], edge_att

In [5]:
from torch.nn import Linear, Flatten
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GraphConv, GATv2Conv, MFConv, GENConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GraphConv(num_node_features, hidden_channels)
        self.conv2 = GraphConv(hidden_channels, hidden_channels)
        self.conv3 = GENConv(hidden_channels, hidden_channels)
        self.conv4 = MFConv(hidden_channels, hidden_channels)
        
        self.lin = Linear(hidden_channels, 1)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)
        x = x.relu()
        x = self.conv4(x, edge_index)

        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        x = torch.reshape(x, [x.shape[0]])
        
        return x

model = GCN(num_node_features=11, hidden_channels=64)
print(model)

GCN(
  (conv1): GraphConv(11, 64)
  (conv2): GraphConv(64, 64)
  (conv3): GENConv(64, 64, aggr=softmax)
  (conv4): MFConv(64, 64)
  (lin): Linear(in_features=64, out_features=1, bias=True)
)


In [6]:
model = GCN(num_node_features=11, hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.MSELoss()

def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
         out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
         loss = torch.sqrt(criterion(out, data.y))  # Compute the loss.
         loss.backward()  # Derive gradients.
         optimizer.step()  # Update parameters based on gradients.
         optimizer.zero_grad()  # Clear gradients.

def test(loader):
     model.eval()
     name = []
     prediction = []

     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         name.append(data['name'])
         pred = model(data.x, data.edge_index, data.batch)
         prediction.append(pred) 

     return name, prediction # Derive ratio of correct predictions.


for epoch in range(1, 100):
    train()
    train_acc = test(train_loader)
    name, pred = test(test_loader)
    print(f'Epoch: {epoch:03d}')

Epoch: 001
Epoch: 002
Epoch: 003
Epoch: 004
Epoch: 005
Epoch: 006
Epoch: 007
Epoch: 008
Epoch: 009
Epoch: 010
Epoch: 011
Epoch: 012
Epoch: 013
Epoch: 014
Epoch: 015
Epoch: 016
Epoch: 017
Epoch: 018
Epoch: 019
Epoch: 020
Epoch: 021
Epoch: 022
Epoch: 023
Epoch: 024
Epoch: 025
Epoch: 026
Epoch: 027
Epoch: 028
Epoch: 029
Epoch: 030
Epoch: 031
Epoch: 032
Epoch: 033
Epoch: 034
Epoch: 035
Epoch: 036
Epoch: 037
Epoch: 038
Epoch: 039
Epoch: 040
Epoch: 041
Epoch: 042
Epoch: 043
Epoch: 044
Epoch: 045
Epoch: 046
Epoch: 047
Epoch: 048
Epoch: 049
Epoch: 050
Epoch: 051
Epoch: 052
Epoch: 053
Epoch: 054
Epoch: 055
Epoch: 056
Epoch: 057
Epoch: 058
Epoch: 059
Epoch: 060
Epoch: 061
Epoch: 062
Epoch: 063
Epoch: 064
Epoch: 065
Epoch: 066
Epoch: 067
Epoch: 068
Epoch: 069
Epoch: 070
Epoch: 071
Epoch: 072
Epoch: 073
Epoch: 074
Epoch: 075
Epoch: 076
Epoch: 077
Epoch: 078
Epoch: 079
Epoch: 080
Epoch: 081
Epoch: 082
Epoch: 083
Epoch: 084
Epoch: 085
Epoch: 086
Epoch: 087
Epoch: 088
Epoch: 089
Epoch: 090
Epoch: 091

In [7]:
pred_flat = torch.cat(pred) 

In [8]:
pred_flat_np = pred_flat.detach().numpy()

In [9]:
flat_name = [item for sublist in name for item in sublist]

In [10]:
import pandas as pd

df = pd.DataFrame({'Idx': flat_name, 'labels':pred_flat_np })
df.to_csv('preditions.csv', sep=',', index=False)


In [11]:
df

Unnamed: 0,Idx,labels
0,gdb_59377,1.903001
1,gdb_14632,4.507661
2,gdb_35326,1.485895
3,gdb_11448,2.314631
4,gdb_35889,4.141517
...,...,...
1995,gdb_99875,3.111997
1996,gdb_49868,2.712770
1997,gdb_21387,3.624872
1998,gdb_89826,2.707370
