In [1]:
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-geometric
!pip install -q git+https://github.com/snap-stanford/deepsnap.git

  Building wheel for deepsnap (setup.py) ... [?25l[?25hdone


In [2]:
import copy
import torch
import torch.nn as nn
import networkx as nx
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch.optim as optim

from deepsnap.graph import Graph
from deepsnap.batch import Batch
from deepsnap.dataset import GraphDataset
from torch_geometric.datasets import Planetoid
from torch.utils.data import DataLoader
from torch_geometric.nn import SAGEConv

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# Read NetworkX graph of restaurants
G = nx.read_gpickle("./drive/MyDrive/Colab Notebooks/restaurants.gpickle")

In [5]:
G.number_of_nodes()

29963

In [6]:
G.number_of_edges()

491464

In [7]:
class GNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, args):
        super(GNN, self).__init__()
        self.num_layers = args["num_layers"]

        self.convs = nn.ModuleList()
        self.convs.append(SAGEConv(input_size, hidden_size))
        for l in range(self.num_layers - 1):
            self.convs.append(SAGEConv(hidden_size, hidden_size))
        self.post_mp = nn.Linear(hidden_size, output_size)

    def forward(self, data):
        x, edge_index, batch = data.node_feature, data.edge_index, data.batch

        for i in range(len(self.convs) - 1):
            x = self.convs[i](x, edge_index)
            x = F.leaky_relu(x)
        x = self.convs[-1](x, edge_index)
        x = F.log_softmax(x, dim=1)
        return x

    def loss(self, pred, label):
        return F.nll_loss(pred, label)

In [10]:
def train(train_loader, val_loader, test_loader, args, num_node_features, num_classes,
          device="cpu"):
    model = GNN(num_node_features, args['hidden_size'], num_classes, args).to(device)
    print(model)
    optimizer = optim.Adam(model.parameters(), lr=args['lr'], weight_decay=5e-4)

    for epoch in range(args['epochs']):
        total_loss = 0
        model.train()
        for batch in train_loader:
            batch.to(device)
            optimizer.zero_grad()
            pred = model(batch)
            label = batch.node_label
            loss = model.loss(pred[batch.node_label_index], label)
            total_loss += loss.item()
            loss.backward()
            optimizer.step()

        train_acc = round(test(train_loader, model, device), 4)
        val_acc = round(test(val_loader, model, device), 4)
        test_acc = round(test(test_loader, model, device), 4)
        print(f"Epoch {epoch + 1}: Train: {train_acc}, Validation: {val_acc}. Test: {test_acc}, Loss: {round(total_loss, 4)}")

def test(loader, model, device='cuda'):
    model.eval()
    for batch in loader:
        batch.to(device)
        logits = model(batch)
        pred = logits[batch.node_label_index].max(1)[1]
        acc = pred.eq(batch.node_label).sum().item()
        total = batch.node_label_index.shape[0]
        acc /= total
    return acc

In [11]:
args = {
    "device" : 'cuda' if torch.cuda.is_available() else 'cpu',
    "hidden_size" : 128,
    "epochs" : 100,
    "lr" : 0.01,
    "num_layers": 5,
}

H = Graph(G)
dataset = GraphDataset(graphs=[H], task='node')

dataset_train, dataset_val, dataset_test = dataset.split(transductive=True, split_ratio=[0.8, 0.1, 0.1])
print(dataset_train, dataset_val, dataset_test)

train_loader = DataLoader(dataset_train, collate_fn=Batch.collate(), batch_size=1)
val_loader = DataLoader(dataset_val, collate_fn=Batch.collate(), batch_size=1)
test_loader = DataLoader(dataset_test, collate_fn=Batch.collate(), batch_size=1)

num_node_features = dataset_train.num_node_features
num_classes = dataset_train.num_node_labels

train(train_loader, val_loader,test_loader, args, num_node_features, num_classes, args["device"])

GraphDataset(1) GraphDataset(1) GraphDataset(1)
GNN(
  (convs): ModuleList(
    (0): SAGEConv(2, 128)
    (1): SAGEConv(128, 128)
    (2): SAGEConv(128, 128)
    (3): SAGEConv(128, 128)
    (4): SAGEConv(128, 128)
  )
  (post_mp): Linear(in_features=128, out_features=3, bias=True)
)
Epoch 1: Train: 0.5005, Validation: 0.4967. Test: 0.5012, Loss: 4.8515
Epoch 2: Train: 0.2578, Validation: 0.2627. Test: 0.2626, Loss: 3.1253
Epoch 3: Train: 0.2501, Validation: 0.25. Test: 0.2472, Loss: 5.0447
Epoch 4: Train: 0.2501, Validation: 0.25. Test: 0.2472, Loss: 3.4888
Epoch 5: Train: 0.2607, Validation: 0.2623. Test: 0.2623, Loss: 2.4435
Epoch 6: Train: 0.5005, Validation: 0.4967. Test: 0.5012, Loss: 3.1811
Epoch 7: Train: 0.5005, Validation: 0.4967. Test: 0.5012, Loss: 4.8266
Epoch 8: Train: 0.4981, Validation: 0.4977. Test: 0.5018, Loss: 2.4932
Epoch 9: Train: 0.2501, Validation: 0.25. Test: 0.2472, Loss: 1.2026
Epoch 10: Train: 0.2501, Validation: 0.25. Test: 0.2472, Loss: 1.5634
Epoch 11: Tra