# Test graph on small dataset

In [1]:
import torch
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch_geometric.nn import global_mean_pool
import numpy as np

In [2]:
import sys, os
from pathlib import Path

sys.path.append(str(Path(os.path.abspath("")).parent))

from model import GNNStack

## Dataset

In [3]:
data = TUDataset(root='/tmp/PROTEINS', name='PROTEINS')
data = data.shuffle()

data.get_summary()

TUDataset (#graphs=1113):
+------------+----------+----------+
|            |   #nodes |   #edges |
|------------+----------+----------|
| mean       |     39.1 |    145.6 |
| std        |     45.8 |    169.3 |
| min        |      4   |     10   |
| quantile25 |     15   |     56   |
| median     |     26   |     98   |
| quantile75 |     45   |    174   |
| max        |    620   |   2098   |
+------------+----------+----------+

In [28]:
from collections import Counter
Counter(data[i].y.item() for i in range(len(data)))

Counter({1: 450, 0: 663})

In [30]:
450 / (450 + 663)

0.40431266846361186

In [4]:
split = int(0.8 * len(data))
train_split, test_split = data[:split], data[split:]
train_data =  DataLoader(train_split, batch_size=32, shuffle=True)
test_data =  DataLoader(test_split, batch_size=32, shuffle=True)

## Helpers

In [5]:
def call_model(batch):
    x = np.argmax(batch.x, axis=1)
    x = model.node_embedding(x)
    
    _, x = model.gcn(x, batch.edge_index)
    x = global_mean_pool(x, batch.batch)
    
    x = model.post_mp(x)
    return x

def train():

    model.train()

    for data in train_data:  # Iterate in batches over the training dataset.
        out = call_model(data)

        loss = criterion(out, data.y.float())  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.


def test(loader):

    model.eval()

    correct = 0
    total_samples = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        out = call_model(data)
        pred = torch.sigmoid(out).round().long()

        correct += data.y.eq(pred).sum().item()

        total_samples += len(pred)

    return correct / total_samples  # Derive ratio of correct predictions.

## Setup and run

In [31]:
model = GNNStack(hidden_dim=64, no_embeddings=3, num_convolutional_layers=4, no_dense_layers=1, direction='single', dropout_rate=0.3)

In [32]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
# criterion = torch.nn.CrossEntropyLoss()
criterion = torch.nn.BCEWithLogitsLoss(reduction="mean")

In [33]:
for epoch in range(1, 20):
    train()
    train_acc = test(train_data)
    test_acc = test(test_data)
    print(f"Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}")

Epoch: 001, Train Acc: 0.6989, Test Acc: 0.7220
Epoch: 002, Train Acc: 0.6528, Test Acc: 0.7265
Epoch: 003, Train Acc: 0.6764, Test Acc: 0.6592
Epoch: 004, Train Acc: 0.6933, Test Acc: 0.7444
Epoch: 005, Train Acc: 0.7157, Test Acc: 0.7534
Epoch: 006, Train Acc: 0.6989, Test Acc: 0.7534
Epoch: 007, Train Acc: 0.7247, Test Acc: 0.7489
Epoch: 008, Train Acc: 0.7157, Test Acc: 0.7713
Epoch: 009, Train Acc: 0.7022, Test Acc: 0.7623
Epoch: 010, Train Acc: 0.7169, Test Acc: 0.7489
Epoch: 011, Train Acc: 0.7202, Test Acc: 0.7534
Epoch: 012, Train Acc: 0.7292, Test Acc: 0.7265
Epoch: 013, Train Acc: 0.7225, Test Acc: 0.7309
Epoch: 014, Train Acc: 0.7281, Test Acc: 0.7309
Epoch: 015, Train Acc: 0.7157, Test Acc: 0.7489
Epoch: 016, Train Acc: 0.7180, Test Acc: 0.6951
Epoch: 017, Train Acc: 0.7180, Test Acc: 0.7354
Epoch: 018, Train Acc: 0.7157, Test Acc: 0.7578
Epoch: 019, Train Acc: 0.7247, Test Acc: 0.7623
