In [1]:
# This data handling code is adapted from the PyTorch geometric collection of google colab notebooks, a fantastic resource for getting started with GNNs. https://pytorch-geometric.readthedocs.io/en/latest/notes/colabs.html
import torch
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader
from torch_geometric.transforms import Constant
# import the graph classifier you built in the last step
from GCN_03 import GraphClassifier, GraphClassifierWelling

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# - - - DATA PREPARATIONS - - -
dataset = TUDataset(
    root='data/TUDataset',
    name='REDDIT-BINARY',
    pre_transform=Constant() # the Reddit dataset has no node features of its own. This "Constant" pre-transform gives each node the value '1'.
    # If all goes according to plan, the GCN should be able to derive good graph representations from the connectivity of the graphs alone.
)
print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')
data = dataset[0]  # Get the first graph object.
print()
print(data)
print('=============================================================')
# Gather some statistics about the first graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')
print(f'Contains self-loops: {data.contains_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')


Dataset: REDDIT-BINARY(2000):
Number of graphs: 2000
Number of features: 1
Number of classes: 2

Data(edge_index=[2, 480], y=[1], x=[218, 1], num_nodes=218)
Number of nodes: 218
Number of edges: 480
Average node degree: 2.20
Contains isolated nodes: False
Contains self-loops: False
Is undirected: True




In [3]:
torch.manual_seed(12345) # for reproducibility
dataset = dataset.shuffle()

train_dataset = dataset[:1000]
test_dataset = dataset[1000:]

print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Number of training graphs: 1000
Number of test graphs: 1000




In [9]:
def train(model, optimizer):
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.
    
def test(model, loader):
    model.eval()

    correct = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch)
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        correct += int((pred == data.y).sum())  # Check against ground-truth labels.
    return correct / len(loader.dataset)  # Derive ratio of correct predictions.

Model Welling

In [6]:
# Finally, we've got the train loader and the test loader! Time to start doing the actual training!
# "A data scientist's job is 90% data, 10% science"
# - - - TRAINING - - -

model_welling = GraphClassifierWelling(hidden_channels=64, num_node_features=1, num_classes=2)
optimizer_welling = torch.optim.Adam(model_welling.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

In [10]:
for epoch in range(1, 201):
    train(model_welling, optimizer_welling)
    if epoch % 10 == 0:
        train_acc = test(model_welling, train_loader)
        test_acc = test(model_welling, test_loader)
        print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 010, Train Acc: 0.5060, Test Acc: 0.4940
Epoch: 020, Train Acc: 0.5060, Test Acc: 0.4940
Epoch: 030, Train Acc: 0.5060, Test Acc: 0.4940
Epoch: 040, Train Acc: 0.5060, Test Acc: 0.4940
Epoch: 050, Train Acc: 0.5060, Test Acc: 0.4940


KeyboardInterrupt: 

Model New

In [11]:
model_new = GraphClassifier(hidden_channels=64, num_node_features=1, num_classes=2)
optimizer_new = torch.optim.Adam(model_new.parameters(), lr=0.01)

In [12]:
for epoch in range(1, 201):
    train(model_new, optimizer_new)
    if epoch % 10 == 0:
        train_acc = test(model_new, train_loader)
        test_acc = test(model_new, test_loader)
        print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 010, Train Acc: 0.7010, Test Acc: 0.7200
Epoch: 020, Train Acc: 0.6990, Test Acc: 0.7200
Epoch: 030, Train Acc: 0.6150, Test Acc: 0.6070
Epoch: 040, Train Acc: 0.7100, Test Acc: 0.7020
Epoch: 050, Train Acc: 0.6770, Test Acc: 0.6870
Epoch: 060, Train Acc: 0.7000, Test Acc: 0.6970
Epoch: 070, Train Acc: 0.6960, Test Acc: 0.6960
Epoch: 080, Train Acc: 0.6940, Test Acc: 0.7080
Epoch: 090, Train Acc: 0.5990, Test Acc: 0.5880
Epoch: 100, Train Acc: 0.6010, Test Acc: 0.6050
Epoch: 110, Train Acc: 0.6410, Test Acc: 0.6400
Epoch: 120, Train Acc: 0.6870, Test Acc: 0.6940
Epoch: 130, Train Acc: 0.6870, Test Acc: 0.6980
Epoch: 140, Train Acc: 0.6260, Test Acc: 0.6250
Epoch: 150, Train Acc: 0.6900, Test Acc: 0.7000


KeyboardInterrupt: 

How do you explain any differences?