<a href="https://colab.research.google.com/github/Lua-Nova/Modern-GAP-GNN/blob/main/ModernGAP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
if torch.cuda.is_available():
  #NVIDIA GPU version
  %pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f f'https://data.pyg.org/whl/torch-1.12.0+{cutorch.version.cuda.replace('.','')}.html'
else:
  #CPU version
  %pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.12.0+cpu.html
%pip install opacus

device = "cuda" if torch.cuda.is_available() else "cpu"



Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-1.12.0+cpu.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcpu/torch_scatter-2.0.9-cp37-cp37m-linux_x86_64.whl (286 kB)
[K     |████████████████████████████████| 286 kB 2.7 MB/s 
[?25hCollecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcpu/torch_sparse-0.6.15-cp37-cp37m-linux_x86_64.whl (641 kB)
[K     |████████████████████████████████| 641 kB 9.6 MB/s 
[?25hCollecting torch-cluster
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcpu/torch_cluster-1.6.0-cp37-cp37m-linux_x86_64.whl (311 kB)
[K     |████████████████████████████████| 311 kB 10.5 MB/s 
[?25hCollecting torch-spline-conv
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcpu/torch_spline_conv-1.2.1-cp37-cp37m-linux_x86_64.whl (121 kB)
[K     |████████████████████████████████| 121 kB 11.6 MB/s 
[?25hCol

In [2]:
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch_geometric.nn import Sequential, GCNConv
import opacus as op

## Encoder Module

In [3]:
torch.manual_seed(11)
# create classes for layers that are used a lot to avoid repeating code

class MLP(nn.Module):
  # e.g. dimensions = [50,40,30,20]
    def __init__(self, dimensions):
        super().__init__()
        self.flatten = nn.Flatten()
        layers = []
        for i in range(len(dimensions)-1):
          layers.append(nn.Linear(dimensions[i], dimensions[i+1]))
          layers.append(nn.ReLU(inplace=True))

        self.linear_relu_stack = nn.Sequential(*layers)

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

## PMA

In [65]:
class PMA(nn.Module):
    # A - adjacency matrix     TODO: this should not be given to the module itself, it should access it in training (or from the graph dataset)
    # num_hops - the number of hops covered by this GNN
    def __init__(self, A, num_hops, sigma):
        super().__init__()
        # TODO: Figure out if you should tranpose this
        # self.A_transpose = torch.transpose(A, 0,1)
        self.A_transpose = A
        self.num_hops = num_hops
        self.sigma = sigma
    
    def forward(self, x):
        # out = [torch.nn.functional.normalize(x, dim=1)]
        # for k in range(self.num_hops):
        #     aggr = torch.mm(self.A_transpose, out[-1])
        #     noised = aggr + torch.normal(torch.zeros(aggr.size()), std=self.sigma)
        #     normalized = torch.nn.functional.normalize(noised, dim=1)
        #     out.append(normalized)
        # return torch.stack(out)
        return torch.nn.functional.normalize(x, dim=1)

In [5]:
# TEMP CODE
smoothing = 0.2
A = torch.tensor([[1.,smoothing,smoothing],
                  [smoothing,1.,smoothing],
                  [smoothing,smoothing,1.]])
x = torch.tensor([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.]])
pma = PMA(A, 10, 1)
tensor = pma(x)
tensor = tensor.cpu().numpy()

# plt.figure(figsize=(16,7))
# plt.imshow(tensor)
# plt.show()
        # [encoder, pma, element_wise_mlp, combine, mlp]


In [6]:
10*torch.ones((2, 3)) + torch.normal(torch.zeros((2, 3)), std=1)

tensor([[10.2378, 11.2296,  9.8472],
        [10.4600, 10.7089, 12.2670]])

## Classification Module
NOTE: 

MLP base: The first MLP in the cassification module. 

MLP head: The last MLP and takes the combined output of all MLP base.

In [7]:
class Classification(nn.Module):
    # num_hops - the number of hops covered by this GNN
    # encoder_dimensions - the MLP dimensions of each base MLP
    # head_dimensions - the dimensions of the head MLP
    def __init__(self, num_hops, encoder_dimensions, head_dimensions):
        super().__init__()
        self.base_mlps = nn.ModuleList()
        for i in range(num_hops+1):
          self.base_mlps.append(MLP(encoder_dimensions))
        self.head_mlp = MLP(head_dimensions) # TODO: should this be softmax? I think we add a softmax for classification tasks. We can test if it works better
    
    def forward(self, cache):
        # forward through bases
        out = []
        for i in range(len(self.base_mlps)):
          encoding = self.base_mlps[i](cache[i,:,:])
          out.append(encoding) # add corresponding encoding
        # combine (use concatenation)
        combined_x = torch.cat(out, dim=1)
        # forward through head
        return self.head_mlp(combined_x)

In [8]:
class GAP(nn.Module):
  # encoder - pretrained encoder module
  # pma - PMA module
  # classification - classification module
  def __init__(self, encoder, pma, classification): # TODO: decide whether we should recieve the models as parameters
    super().__init__()
    self.encoder = encoder
    self.encoder.requires_grad=False
    self.pma = pma
    self.classification = classification

  def forward(self, x):
    # initial node encoding
    x_encoded = self.encoder(x)
    # aggregation module
    cache = self.pma(x_encoded) 
    # classification
    return self.classification(cache) 


##Hyperparameters

In [50]:
node_level = True

# Edge level DP
epsilon, delta, alpha = 1000, 0.1, 1
# specify specific epsilon_1, epsilon_5 for node-level and then just do a subtraction, and calculate sigma from remaining epsilon
K_hop = 0
# sigma = 1 / np.max(np.roots([K_hop/2, np.sqrt(2*K_hop*np.log(1/delta)), -epsilon]))
sigma = 0
# Node level DP
if (node_level):
  pass
  # How do we calculate this?
data = "reddit"

print("sigma:", sigma)

sigma: 0


## Data

In [11]:
# returns filtered edge index, first removes edges that have removed src or dst nodes, then shifts indices of remained src/dst nodes
def filter_edge_index(edge_index, filter):
    vertex_remap = torch.zeros(filter.size(), dtype=torch.int)
    new_id = 0
    for i in range(filter.size(dim=0)):
      if filter[i]:
        vertex_remap[i] = new_id
        new_id += 1
    edge_index = edge_index.to(torch.long)
    edge_filter = torch.logical_and(filter[edge_index[0]], filter[edge_index[1]])
    return torch.from_numpy(np.array([np.fromiter((vertex_remap[row] for row in edge_index[0, edge_filter]), int), 
                                      np.fromiter((vertex_remap[row] for row in edge_index[1, edge_filter]), int)]))

def prepare_dataset(X, y, edge_index, filter):
    num_classes = torch.unique(y).size(dim=0)
    # loader = DataLoader(dataset, batch_size=len(dataset), shuffle=True)
    # since we are using an adjacency matrix instead of edgelist, make that
    indexes = torch.empty((2,0), dtype = torch.int)

    edge_index = filter_edge_index(edge_index, filter)

    for i in range(edge_index.size(dim=1)):
        src, dst = edge_index[0, i], edge_index[1, i]
        # since undirected
        indexes = torch.cat((indexes, torch.tensor([[src], [dst]])), 1)

    values = torch.ones(indexes[0].size(), dtype = torch.int)


    A = torch.sparse_coo_tensor(indexes, values, (X.size(dim=0), X.size(dim=0)), dtype=torch.float)

    return A, num_classes

In [12]:
from torch_geometric.datasets import Amazon
from torch_geometric.loader import DataLoader
dataset = Amazon('.', name='Computers')

y = dataset[0]['y']
X = dataset[0]['x']
index_map = torch.zeros(y.size())
included_classes = y.unique(return_counts=True)[1] >= 1000
# remap labels (i.e. if they were 0-8 and we remove 4 labels, new labels should be between 0 and 4)
label_remap = torch.zeros(included_classes.size(), dtype=torch.int)
new_id = 0
for i in range(included_classes.size(dim=0)):
  if included_classes[i]:
    label_remap[i] = new_id
    new_id += 1
filter = included_classes[y]
y = label_remap[y[filter]].to(torch.long)
X = X[filter]

# print(X)

Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_computers.npz
Processing...
Done!


In [13]:
print(X.size())
edge_index = dataset[0]['edge_index']
A, num_classes = prepare_dataset(X, y, edge_index, filter)

torch.Size([10870, 767])


## Train/Test


In [60]:
# train
def train(X, y, model, loss_fn, optimizer): 
    # make this into dataloader using backup
    model.train()
    X, y = X.to(device), y.to(device)

    # Compute prediction error
    pred = model(X)
    loss = loss_fn(pred, y)

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# test
def test(X, y, model, loss_fn):
    size = X.size(dim=0)
    model.eval()
    test_loss, correct = 0, 0
    with torch.inference_mode():
        X, y = X, y
        X, y = X.to(device), y.to(device)
        pred = model(X)
        test_loss += loss_fn(pred, y).item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

##Sampling from K-hop neighbourhood

## Encoder

Encoder Design


In [52]:
# encoder
dimensions = [767, 300, 60]
encoder_train = nn.Sequential(
    MLP(dimensions),
    nn.Linear(dimensions[-1], num_classes),
    nn.Softmax(dim=1)
)

Encoder Pretraining

In [53]:
encoder_model = encoder_train.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(encoder_model.parameters(), lr=1e-3)

# if node_level:
#   optimizer = op.optimizers.optimizer.DPOptimizer(
#       # TODO: Fill out these parameters '?'
#       optimizer=optimizer,
#       noise_multiplier=?,
#       max_grad_norm=?
#   )

epochs = 100
for t in range(epochs):
    # print(f"Epoch {t+1}\n-------------------------------")
    train(X, y, encoder_model, loss_fn, optimizer)
    if t % 10 == 0:
      test(X, y, encoder_model, loss_fn)
print("Done!")

encoder = encoder_model[0]

# for name, param in encoder_model.named_parameters():
#     if param.requires_grad:
#         print(name, param.data)

Test Error: 
 Accuracy: 47.5%, Avg loss: 1.356136 

Test Error: 
 Accuracy: 47.5%, Avg loss: 1.269011 

Test Error: 
 Accuracy: 47.5%, Avg loss: 1.261342 

Test Error: 
 Accuracy: 60.1%, Avg loss: 1.146479 

Test Error: 
 Accuracy: 70.7%, Avg loss: 1.078218 

Test Error: 
 Accuracy: 75.3%, Avg loss: 1.016876 

Test Error: 
 Accuracy: 77.7%, Avg loss: 0.981873 

Test Error: 
 Accuracy: 78.9%, Avg loss: 0.962439 

Test Error: 
 Accuracy: 80.0%, Avg loss: 0.950373 

Test Error: 
 Accuracy: 80.7%, Avg loss: 0.941170 

Done!


## Full Model Training

Train full model

In [66]:
encoder.requires_grad=False
model = nn.Sequential(encoder, 
                      PMA(A, K_hop, sigma), 
                      nn.Linear(60, num_classes), 
                      nn.Softmax(dim=1))
# model = nn.Sequential(encoder,
#                       PMA(A, K_hop, sigma),
#                       Classification(K_hop, [60, 20], [(K_hop+1)*20, num_classes]))
# model = GAP(encoder, 
#             PMA(A, K_hop, sigma), 
#             Classification(K_hop, [60, 60], [(K_hop+1)*60, num_classes]))
# model = nn.Sequential(encoder, 
#                       PMA(A, K_hop, sigma))
model = model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)

epochs = 100
for t in range(epochs):
    # print(f"Epoch {t+1}\n-------------------------------")
    train(X, y, model, loss_fn, optimizer)
    if t % 10 == 0:
      test(X, y, model, loss_fn)
print("Done!")

# print(nn.functional.normalize(encoder(X), dim=1))
# print(model(X))

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.383258 

Test Error: 
 Accuracy: 20.1%, Avg loss: 1.372434 

Test Error: 
 Accuracy: 63.7%, Avg loss: 1.360602 

Test Error: 
 Accuracy: 47.5%, Avg loss: 1.348021 

Test Error: 
 Accuracy: 47.5%, Avg loss: 1.335101 

Test Error: 
 Accuracy: 47.5%, Avg loss: 1.322352 



KeyboardInterrupt: ignored

In [None]:
# Sigma calculated above in node-level and edge-level DP case
gap = GAP(encoder, PMA(A, K_hop, sigma), Classification(K_hop, [60, 30, 20], [(K_hop+1)*20, 60, 30, num_classes]))
gap_model = gap.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(gap_model.parameters(), lr=1e-1)

# if node_level:
#   optimizer = op.optimizers.optimizer.DPOptimizer(
#       # TODO: Fill out these parameters '?'
#       optimizer=optimizer,
#       noise_multiplier=?,
#       max_grad_norm=?,
#       loss_reduction='sum'
#   ) 

epochs = 500
for t in range(epochs):
    # print(f"Epoch {t+1}\n-------------------------------")
    train(X, y, gap_model, loss_fn, optimizer)
    if t % 10 == 0:
      test(X, y, gap_model, loss_fn)
print("Done!")

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 
 Accuracy: 19.7%, Avg loss: 1.386295 

Test Error: 

## Backup

In [None]:
# # train
# def train(dataloader, model, loss_fn, optimizer, print_every = 100):
#     size = len(dataloader.dataset)
#     model.train()
#     for batch, (X, y) in enumerate(dataloader):
#         X, y = X.to(device), y.to(device)

#         # Compute prediction error
#         pred = model(X)
#         loss = loss_fn(pred, y)

#         # Backpropagation
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#         if batch % print_every == 0:
#             loss, current = loss.item(), batch * len(X)
#             print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

# # test
# def test(dataloader, model, loss_fn):
#     size = len(dataloader.dataset)
#     num_batches = len(dataloader)
#     model.eval()
#     test_loss, correct = 0, 0
#     with torch.inference_mode():
#         for X, y in dataloader:
#             X, y = X.to(device), y.to(device)
#             pred = model(X)
#             test_loss += loss_fn(pred, y).item()
#             correct += (pred.argmax(1) == y).type(torch.float).sum().item()
#     test_loss /= num_batches
#     correct /= size
#     print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")