# ベンチマークデータセットのGCN

In [1]:
import networkx as nx
import torch
import numpy as np

import sys
sys.path.append("../codes")

In [20]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool
from torch_geometric.data import Data, DataLoader
from torch_geometric.datasets import TUDataset


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels, num_classes):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, num_classes)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

In [21]:
def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.

def test(loader):
    model.eval()

    correct = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch)  
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        correct += int((pred == data.y).sum())  # Check against ground-truth labels.
    return correct / len(loader.dataset)  # Derive ratio of correct predictions.

In [22]:
# data load and split
dataset = TUDataset(root='data/TUDataset', name='DD')

torch.manual_seed(12345)
dataset = dataset.shuffle()

train_index = int(0.7 * len(dataset))

train_dataset = dataset[:train_index]
test_dataset = dataset[train_index:]

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [23]:
# set model and train
model = GCN(hidden_channels=64, num_classes=dataset.num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

train_accs = []
test_accs = []


for epoch in range(1, 10):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    
    train_accs.append(train_acc)
    test_accs.append(test_acc)
    
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Train Acc: 0.5934, Test Acc: 0.5706
Epoch: 002, Train Acc: 0.5934, Test Acc: 0.5706
Epoch: 003, Train Acc: 0.5934, Test Acc: 0.5706
Epoch: 004, Train Acc: 0.6808, Test Acc: 0.6864
Epoch: 005, Train Acc: 0.6553, Test Acc: 0.6441
Epoch: 006, Train Acc: 0.6796, Test Acc: 0.6893
Epoch: 007, Train Acc: 0.7269, Test Acc: 0.7373
Epoch: 008, Train Acc: 0.7148, Test Acc: 0.7203
Epoch: 009, Train Acc: 0.7391, Test Acc: 0.7345


In [None]:
torch.save(model.to("cpu").state_dict(), ave_dir + "/model.pth")