In [2]:
!pip install torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
!pip install torch-geometric

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-+.html
Collecting torch-scatter
  Downloading torch_scatter-2.0.9.tar.gz (21 kB)
Building wheels for collected packages: torch-scatter
  Building wheel for torch-scatter (setup.py) ... [?25l[?25hdone
  Created wheel for torch-scatter: filename=torch_scatter-2.0.9-cp37-cp37m-linux_x86_64.whl size=3577503 sha256=bba965877ee29edd39f51c75e05b3179cb2b1768fa8c045b8541021dbca2a0d8
  Stored in directory: /root/.cache/pip/wheels/dd/57/a3/42ea193b77378ce634eb9454c9bc1e3163f3b482a35cdee4d1
Successfully built torch-scatter
Installing collected packages: torch-scatter
Successfully installed torch-scatter-2.0.9
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-+.html
Collecting torch-sparse
  Downloading torch_sparse-0.6.13.tar.gz (48 kB)
[K     |████████

In [4]:
from math import ceil

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.loader import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.nn import dense_diff_pool, DenseSAGEConv
from torch_geometric.utils import to_dense_batch, to_dense_adj
from tqdm import tqdm


class GNN(torch.nn.Module):
    def __init__(self, num_layer, in_dim, hidden_dim, out_dim):
        super().__init__()
        self.layers = torch.nn.ModuleList()
        for i in range(num_layer):
            if i == 0:
                self.layers.append(DenseSAGEConv(in_dim, hidden_dim))
            elif i == num_layer - 1:
                self.layers.append(DenseSAGEConv(hidden_dim, out_dim))
            else:
                self.layers.append(DenseSAGEConv(hidden_dim, hidden_dim))

            if i != num_layer - 1:
                self.layers.append(torch.nn.BatchNorm1d(hidden_dim))
            else:
                self.layers.append(torch.nn.BatchNorm1d(out_dim))

    def forward(self, x, a, mask=None):
        for layer in self.layers:
            if isinstance(layer, DenseSAGEConv):
                x = layer(x, a, mask)
            else:
                batch_size, num_nodes, num_channels = x.size()
                x = x.view(-1, num_channels)
                x = layer(x)
                x = x.view(batch_size, num_nodes, num_channels)
                x = F.relu(x)
        return x


class DiffPool(torch.nn.Module):
    def __init__(self, hidden_dim, num_cluster):
        super().__init__()
        self.embedding = GNN(3, hidden_dim, hidden_dim, hidden_dim)
        self.assignment = GNN(3, hidden_dim, hidden_dim, num_cluster)

    def forward(self, x, a, mask=None):
        z = self.embedding(x, a, mask)
        s = self.assignment(x, a, mask)
        s = F.softmax(s, dim=1)
        return dense_diff_pool(z, a, s, mask)


class Net(torch.nn.Module):
    def __init__(self, dataset, max_node, hidden_dim):
        super().__init__()
        self.max_node = max_node
        pool1_cluster = ceil(.25 * self.max_node)
        pool2_cluster = ceil(.25 * pool1_cluster)
        self.gnn_before_p1 = GNN(2, dataset.num_features, hidden_dim, hidden_dim)
        self.gnn_after_p1 = GNN(3, hidden_dim, hidden_dim, hidden_dim)
        self.gnn_after_p2 = GNN(3, hidden_dim, hidden_dim, hidden_dim)

        self.p1 = DiffPool(hidden_dim, pool1_cluster)
        self.p2 = DiffPool(hidden_dim, pool2_cluster)
        self.mlp = torch.nn.Sequential(torch.nn.Linear(hidden_dim, hidden_dim // 2),
                                       torch.nn.BatchNorm1d(hidden_dim // 2),
                                       nn.ReLU(),
                                       nn.Linear(hidden_dim // 2, dataset.num_classes))

    def forward(self, x, a, mask=None):
        x = self.gnn_before_p1(x, a, mask)
        x, a, l1, e1 = self.p1(x, a, mask)
        x = self.gnn_after_p1(x, a)
        x, a, l2, e2 = self.p2(x, a)
        x = self.gnn_after_p2(x, a)
        x = x.mean(dim=1)
        return self.mlp(x), l1 + l2 + e1 + e2


@torch.no_grad()
def test(model, loader, device):
    
    model.eval()
    correct = 0

    for data in loader:
        data = data.to(device)
        x, mask = to_dense_batch(data.x, data.batch)
        adj = to_dense_adj(data.edge_index, data.batch)
        pred = model(x, adj, mask)[0].max(dim=1)[1]
        correct += pred.eq(data.y.view(-1)).sum().item()
    return correct / len(loader.dataset)


experiment_name = 'DiffPool'
input_path = '/kaggle/working/data'
output_path = '/kaggle/working/'
results = dict()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


dataset = TUDataset(input_path, 'DD')
train_cnt = int(.8 * len(dataset))
val_cnt = int(.1 * len(dataset))
test_cnt = len(dataset) - train_cnt - val_cnt
train_data, val_data, test_data = torch.utils.data.random_split(dataset, [train_cnt, val_cnt, test_cnt],
                                                                torch.random.manual_seed(0))
max_node = 0
for data in dataset:
    if data.num_nodes > max_node:
        max_node = data.num_nodes

for wd in [1e-3,1e-4,1e-5,1e-6,1e-2,1e-1]:        
    hidden_dim=64
    model = Net(dataset, max_node, hidden_dim).to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3,weight_decay=wd)
    train_set = DataLoader(train_data, shuffle=True, batch_size=16)
    val_set = DataLoader(val_data)
    test_set = DataLoader(test_data)
    epoch = 30
    best_val_acc = 0
    best_test_acc = 0
    epoch_to_break = 0

    for i in range(epoch):
        train_sum_acc = 0
        train_cnt = 0
        train_loss = 0
        model.train()
        for data in train_set:
            data = data.to(device)
            optimizer.zero_grad()
            x, mask = to_dense_batch(data.x, data.batch)
            adj = to_dense_adj(data.edge_index, data.batch)
            result, lpe_loss = model(x, adj, mask)
            loss = criterion(result, data.y) + lpe_loss
            train_loss += loss
            loss.backward()
            optimizer.step()
            pred = result.max(dim=1)[1]
            acc = torch.eq(pred, data.y).sum().item()
            train_sum_acc += acc
            train_cnt += len(data.y)

        val_acc = test(model, val_set, device)
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_test_acc = test(model, test_set, device)
            epoch_to_break = 0
            print(
            f'Epoch{i}: TrainAcc: {train_sum_acc / train_cnt:.6f} TrainLoss:{train_loss / train_cnt:.6f} ValAcc:{val_acc:.6f}'
            f' BestTestAcc: {best_test_acc:.6f}')
        else:
            epoch_to_break += 1

        if epoch_to_break >= 10:
            break
    results[wd]=best_test_acc

print(results)

Epoch0: TrainAcc: 0.705945 TrainLoss:0.595837 ValAcc:0.692308 BestTestAcc: 0.764706
Epoch1: TrainAcc: 0.751592 TrainLoss:0.592268 ValAcc:0.709402 BestTestAcc: 0.781513
Epoch5: TrainAcc: 0.771762 TrainLoss:0.588028 ValAcc:0.717949 BestTestAcc: 0.747899
Epoch6: TrainAcc: 0.780255 TrainLoss:0.591335 ValAcc:0.726496 BestTestAcc: 0.773109
Epoch10: TrainAcc: 0.788747 TrainLoss:0.589115 ValAcc:0.743590 BestTestAcc: 0.731092
Epoch0: TrainAcc: 0.681529 TrainLoss:0.598839 ValAcc:0.632479 BestTestAcc: 0.722689
Epoch1: TrainAcc: 0.757962 TrainLoss:0.594708 ValAcc:0.692308 BestTestAcc: 0.806723
Epoch4: TrainAcc: 0.750531 TrainLoss:0.595495 ValAcc:0.709402 BestTestAcc: 0.781513
Epoch7: TrainAcc: 0.784501 TrainLoss:0.591681 ValAcc:0.726496 BestTestAcc: 0.798319
Epoch11: TrainAcc: 0.797240 TrainLoss:0.588432 ValAcc:0.760684 BestTestAcc: 0.773109
Epoch12: TrainAcc: 0.799363 TrainLoss:0.587725 ValAcc:0.777778 BestTestAcc: 0.798319
Epoch0: TrainAcc: 0.722930 TrainLoss:0.595904 ValAcc:0.683761 BestTestAcc