# Node classification example for PEG (cora)

# Download dataset and load the data

In [1]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
import torch

dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('===========================================================================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')


Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
Number of nodes: 2708
Number of edges: 10556
Average node degree: 3.90
Number of training nodes: 140
Training node label rate: 0.05


In [2]:
from torch_geometric.utils import to_networkx
device = f'cuda:{1}' if torch.cuda.is_available() else 'cpu'

# Preprocessing: calculate positional encodings

In [3]:
import networkx as nx
import numpy as np
import scipy.sparse as sp
import dgl
def laplacian_positional_encoding(g, pos_enc_dim):
    """
        Graph positional encoding v/ Laplacian eigenvectors
    """

    # Laplacian
    A = g.adjacency_matrix_scipy(return_edge_ids=False).astype(float)
    N = sp.diags(dgl.backend.asnumpy(g.in_degrees()).clip(1) ** -0.5, dtype=float)
    L = sp.eye(g.number_of_nodes()) - N * A * N

    # Eigenvectors with scipy
    #EigVal, EigVec = sp.linalg.eigs(L, k=pos_enc_dim+1, which='SR')
    EigVal, EigVec = sp.linalg.eigs(L, k=pos_enc_dim+1, which='SR', tol=1e-2) # for 40 PEs
    EigVec = EigVec[:, EigVal.argsort()] # increasing order
    out = torch.from_numpy(EigVec[:,1:pos_enc_dim+1]).float() 

    return out

Using backend: pytorch


In [4]:
PE_dim = 128
G = to_networkx(data)
adj = nx.to_numpy_array(G)
sp_adj = sp.coo_matrix(adj)
g = dgl.from_scipy(sp_adj)
embeddings = np.array(laplacian_positional_encoding(g, PE_dim))


	DGLGraph.adjacency_matrix(transpose, scipy_fmt="csr").



# Training: Set up model and train

In [5]:
import torch.nn.functional as F
from peg_conv import PEGConv
from torch import nn

In [6]:
class Net(torch.nn.Module):
    def __init__(self, in_feats_dim, hidden_dim):
        super(Net, self).__init__()
        
        self.in_feats_dim = in_feats_dim
        self.hidden_dim = hidden_dim

        self.conv1 = PEGConv(in_channels = in_feats_dim, out_channels = hidden_dim)
        self.conv2 = PEGConv(in_channels = hidden_dim, out_channels = hidden_dim)

    def forward(self, x, pos_encoding, edge_index):
        x = self.conv1(x, pos_encoding, edge_index)
        x = self.conv2(x, pos_encoding, edge_index)
        
        return F.log_softmax(x, dim=1)


In [7]:
model = Net(in_feats_dim = dataset.num_features, hidden_dim = 128)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

model, data = model.to(device), data.to(device)
x = data.x
pos = torch.tensor(embeddings).to(device)
x = x.cuda(device)

In [8]:
def train():
    model.train()
    optimizer.zero_grad()
    output = model(x, pos, data.edge_index)
    loss = F.nll_loss(output[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

@torch.no_grad()
def test():
    model.eval()
    logits, accs = model(x, pos, data.edge_index), []
    for _, mask in data("train_mask", "val_mask", "test_mask"):
        pred = logits[mask].max(1)[1]
        acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
        accs.append(acc)
    return accs

In [9]:
best_val_acc = test_acc = 0
for epoch in range(1, 201):
    train()
    train_acc, val_acc, tmp_test_acc = test()
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc
    log = 'Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
    print(log.format(epoch, train_acc, best_val_acc, test_acc))

Epoch: 001, Train: 0.3143, Val: 0.2180, Test: 0.1980
Epoch: 002, Train: 0.4857, Val: 0.2860, Test: 0.2730
Epoch: 003, Train: 0.5429, Val: 0.3040, Test: 0.3010
Epoch: 004, Train: 0.5429, Val: 0.3040, Test: 0.3010
Epoch: 005, Train: 0.4857, Val: 0.3040, Test: 0.3010
Epoch: 006, Train: 0.4500, Val: 0.3040, Test: 0.3010
Epoch: 007, Train: 0.4214, Val: 0.3040, Test: 0.3010
Epoch: 008, Train: 0.3857, Val: 0.3040, Test: 0.3010
Epoch: 009, Train: 0.3500, Val: 0.3040, Test: 0.3010
Epoch: 010, Train: 0.3429, Val: 0.3040, Test: 0.3010
Epoch: 011, Train: 0.3286, Val: 0.3040, Test: 0.3010
Epoch: 012, Train: 0.3214, Val: 0.3040, Test: 0.3010
Epoch: 013, Train: 0.3214, Val: 0.3040, Test: 0.3010
Epoch: 014, Train: 0.3571, Val: 0.3040, Test: 0.3010
Epoch: 015, Train: 0.5000, Val: 0.3040, Test: 0.3010
Epoch: 016, Train: 0.6286, Val: 0.3720, Test: 0.3770
Epoch: 017, Train: 0.5786, Val: 0.3720, Test: 0.3770
Epoch: 018, Train: 0.4500, Val: 0.3720, Test: 0.3770
Epoch: 019, Train: 0.4643, Val: 0.3720, Test: 

Epoch: 163, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 164, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 165, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 166, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 167, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 168, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 169, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 170, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 171, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 172, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 173, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 174, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 175, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 176, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 177, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 178, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 179, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 180, Train: 1.0000, Val: 0.8000, Test: 0.8180
Epoch: 181, Train: 1.0000, Val: 0.8000, Test: 