In [None]:
"""Basic script for loading the PROTEINS dataset and running training scrip with it.
To run it install pytorch for your OS and config from https://pytorch.org/get-started/locally/
and install PyG https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html"""

In [1]:
"""Imports"""
import torch
import torch.nn.functional as F
from torch_geometric.nn import global_mean_pool
from torch_geometric.loader import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.nn import GCNConv
from torch.nn import BCELoss

In [2]:
"""Load the data, wrap it around DataLoader (utility that acts as batch generator).
Train/val split is done arbitrarely. This dataset does not contain edge features. Only 
Node features, edges and graph label."""
dataset_raw = TUDataset(root='',name='PROTEINS',use_node_attr =True,use_edge_attr=True)
train_ds = DataLoader(dataset_raw[:1000],batch_size=32,shuffle=True)
val_ds = DataLoader(dataset_raw[1000:],batch_size=32)

In [3]:
class GCN(torch.nn.Module):
    """Create GCN module. It is a neural network that accepts graphs of varying shapes as input, and returns 
    the label of a given graph (molecule or not). It is based on classic GCNConv, other methods has to be applied here."""
    def __init__(self,n_node_features,hidden_dim):
        super().__init__()
        self.conv1 = GCNConv(n_node_features, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)
        self.linear1 = torch.nn.Linear(hidden_dim,1)
    def forward(self, data):
        x, edge_index, batch_size = data.x, data.edge_index, data.batch
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = global_mean_pool(x,batch_size)
        x = self.linear1(x)
        out = torch.sigmoid(x)
        return torch.squeeze(out)

In [None]:
"""Initializing the network and optimizer. For debugging purposes the network runs on CPU."""
device = torch.device('cpu')
model = GCN(dataset_raw.num_node_features,16).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
"""Train the model on the training data. Parameters are updated per batch.
WARNING! For batch size of 1 user probably will need to torch.squeeze data.y"""
model.train()

for epoch in range(1000):
    epoch_loss = 0.0
    for data in train_ds:
        optimizer.zero_grad()
        out = model(data)
        loss = BCELoss()(out, data.y.to(torch.float32))
        epoch_loss += loss
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch}, loss: {epoch_loss.detach().numpy()}")