In [None]:

# !pip install torch torchvision torchaudio
# !pip install torch-geometric


In [1]:

import torch
import torch.nn.functional as F
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool


In [3]:

dataset = TUDataset(root='data/MUTAG', name='MUTAG')
print(f"Number of graphs: {len(dataset)}")
print(f"Number of classes: {dataset.num_classes}")
print(f"Number of node features: {dataset.num_node_features}")
print(dataset[0])


Downloading https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip


Number of graphs: 188
Number of classes: 2
Number of node features: 7
Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1])


Processing...
Done!


In [5]:

torch.manual_seed(42)
dataset = dataset.shuffle()
train_dataset = dataset[:150]
test_dataset = dataset[150:]
train_loader = DataLoader(train_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)


In [7]:

class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 64)
        self.conv2 = GCNConv(64, 64)
        self.fc = torch.nn.Linear(64, dataset.num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        x = global_mean_pool(x, batch)  # Pooling over all nodes in each graph
        x = self.fc(x)
        return F.log_softmax(x, dim=1)


In [9]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


In [11]:

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

for epoch in range(1, 101):
    loss = train()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


Epoch 10, Loss: 0.5208
Epoch 20, Loss: 0.4907
Epoch 30, Loss: 0.4845
Epoch 40, Loss: 0.4828
Epoch 50, Loss: 0.4779
Epoch 60, Loss: 0.4756
Epoch 70, Loss: 0.4740
Epoch 80, Loss: 0.4707
Epoch 90, Loss: 0.4678
Epoch 100, Loss: 0.4648


In [12]:

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        out = model(data)
        pred = out.argmax(dim=1)
        correct += int((pred == data.y).sum())
    return correct / len(loader.dataset)

train_acc = test(train_loader)
test_acc = test(test_loader)
print(f"Train Accuracy: {train_acc:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")


Train Accuracy: 0.7533
Test Accuracy: 0.6842
