<a href="https://colab.research.google.com/github/Lua-Nova/Modern-GAP-GNN/blob/new/ModernGAP_Experiment_Version.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
if torch.cuda.is_available():
  #NVIDIA GPU version
  %pip uninstall torch-scatter torch-sparse torch-geometric torch-cluster  --y
  %pip install torch-scatter -f https://data.pyg.org/whl/torch-{torch.__version__}.html
  %pip install torch-sparse -f https://data.pyg.org/whl/torch-{torch.__version__}.html
  %pip install torch-cluster -f https://data.pyg.org/whl/torch-{torch.__version__}.html
  %pip install git+https://github.com/pyg-team/pytorch_geometric.git
else:
  #CPU version
  %pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric==2.0.0 -f https://data.pyg.org/whl/torch-1.12.0+cpu.html
%pip uninstall pyvacy  --y
%pip install pyvacy

device = "cuda" if torch.cuda.is_available() else "cpu"



In [2]:
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch_geometric.nn import Sequential, GCNConv

## Encoder Module

In [3]:
torch.manual_seed(11)
# create classes for layers that are used a lot to avoid repeating code

class MLP(nn.Module):
  # e.g. dimensions = [50,40,30,20]
    def __init__(self, dimensions):
        super().__init__()
        self.flatten = nn.Flatten()
        layers = []
        for i in range(len(dimensions)-1):
          layers.append(nn.Linear(dimensions[i], dimensions[i+1]))
          layers.append(nn.SELU(inplace=True))

        self.linear_selu_stack = nn.Sequential(*layers)

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_selu_stack(x)
        return logits

## PMA

In [32]:
class AggregationModule(nn.Module):
  edge_index = None

  def __init__(self):
        super().__init__()

class PMA(AggregationModule):
    # A - adjacency matrix     TODO: this should not be given to the module itself, it should access it in training (or from the graph dataset)
    # num_hops - the number of hops covered by this GNN
    def __init__(self, num_hops, sigma):
        super().__init__()
        self.num_hops = num_hops
        self.sigma = sigma

    def forward(self, x):
        # TEMP SOLUTION
        if AggregationModule.edge_index is None:
          raise RuntimeError("Set AggregationModule.edge_index [TEMP SOLUTION] before running")
        edge_index = AggregationModule.edge_index
        A = get_adjacency_matrix(edge_index, x.size(dim=0))
        out = [torch.nn.functional.normalize(x, dim=1)]
        for k in range(self.num_hops):
            aggr = torch.mm(torch.transpose(A, 0, 1), out[-1].to(device))
            noised = aggr + torch.normal(torch.zeros(aggr.size()), std=self.sigma).to(device)
            normalized = torch.nn.functional.normalize(noised, dim=1)
            out.append(normalized)
        return torch.stack(out)
        # return torch.nn.functional.normalize(x, dim=1)

class PMAT(AggregationModule):
    # def __init__(self, num_hops, transform_dimensions):
    def __init__(self, num_hops, encoding_dimensions, sigma):
        super().__init__()
        self.num_hops = num_hops
        self.sigma = sigma
        self.sigmoid = nn.Sigmoid()
        # self.transforms = nn.ModuleList()
        self.attentions = nn.ModuleList()
        for i in range(num_hops):
          # self.transforms.append(nn.Linear(*transform_dimensions)) # Only 1 layer transformation
          # self.attentions.append(MLP([2*transform_dimensions[-1], 1])) # Attention mechanism takes 2 encodings and outputs 1 weight
          # TODO: Figure out if we want a transformer?
          self.attentions.append(MLP([2*encoding_dimensions, 1]))

    def forward(self, x):
        # TEMP SOLUTION
        if AggregationModule.edge_index is None:
          raise RuntimeError("Set AggregationModule.edge_index [TEMP SOLUTION] before running")
        edge_index = AggregationModule.edge_index
        out = [torch.nn.functional.normalize(x, dim=1)]
        for k in range(self.num_hops):
            # Do we need to do a transform? I reckon we can use raw encoding and aggregate according to attention (and then the Classification module)
            # can handle how the aggregations get transformed
            # h = self.transforms[k](out[-1])
            h = out[-1]
            e_values = self.attentions[k](h[edge_index.T].reshape(edge_index.size(dim=1), 2*h.size(dim=1))) # DPSGD to guarantee DP attention training
            # we have to use Sigmoid because if we use Softmax, removing an edge will change the weight of all other edges in the neighbourhood
            alpha_values = self.sigmoid(e_values)
            alpha = torch.sparse_coo_tensor(edge_index,
                                            alpha_values.reshape(edge_index.size(dim=1)),
                                            (x.size(dim=0), x.size(dim=0)),
                                            dtype=torch.float).transpose(0, 1)

            aggr = torch.sparse.mm(alpha, h)
            # Might need to not use "transforms" and instead do raw aggregations like the original PMA
            # aggr = torch.mm(self.A, out[-1]) 
            # noised = aggr # TODO: add noise # Gaussian mechanism to guarantee DP for neighbourhood aggregation
            noised = aggr + torch.normal(torch.zeros(aggr.size()), std=self.sigma).to(device)
            normalized = torch.nn.functional.normalize(noised, dim=1)
            out.append(normalized)
        return torch.stack(out)


## Classification Module
NOTE: 

MLP base: The first MLP in the cassification module. 

MLP head: The last MLP and takes the combined output of all MLP base.

In [5]:
class Classification(nn.Module):
    # num_hops - the number of hops covered by this GNN
    # encoder_dimensions - the MLP dimensions of each base MLP
    # head_dimensions - the dimensions of the head MLP
    def __init__(self, num_hops, encoder_dimensions, head_dimensions):
        super().__init__()
        self.base_mlps = nn.ModuleList()
        self.num_hops = num_hops
        if encoder_dimensions:
          for i in range(num_hops+1):
              self.base_mlps.append(MLP(encoder_dimensions))
        self.head_mlp = MLP(head_dimensions) # TODO: should this be softmax? I think we add a softmax for classification tasks. We can test if it works better
    
    def forward(self, cache):
        # forward through bases
        out = []
        for i in range(self.num_hops+1):
          if self.base_mlps:
            encoding = self.base_mlps[i](cache[i,:,:])
            out.append(encoding) # add corresponding encoding
          else:
            out.append(cache[i, :, :])
        # combine (use concatenation)
        combined_x = torch.cat(out, dim=1)
        # forward through head
        return self.head_mlp(combined_x)

In [6]:
class GAP(nn.Module):
  # encoder - pretrained encoder module
  # pma - PMA module
  # classification - classification module
  def __init__(self, encoder, pma, classification): # TODO: decide whether we should recieve the models as parameters
    super().__init__()
    self.encoder = encoder
    self.encoder.requires_grad=False
    self.pma = pma
    self.classification = classification

  def forward(self, x):
    # initial node encoding
    x_encoded = self.encoder(x)
    # aggregation module
    cache = self.pma(x_encoded) 
    # classification
    return self.classification(cache) 


##Hyperparameters

In [46]:
node_level = True

# Edge level DP
agg_epsilon, agg_delta, epsilon_1, epsilon_5, pmat_epsilon, pmat_delta = 4, 0.5, 4, 4, 4, 0.5
K_hop = 1
agg_sigma = 1 / np.max(np.roots([K_hop/2, np.sqrt(2*K_hop*np.log(1/agg_delta)), -agg_epsilon]))
# Node level DP
if (node_level):
  pass
  # How do we calculate this?
data = "reddit"
batch_size = 32

print(f"Epsilon: {agg_epsilon:>0.2f}, Delta: {agg_delta:>0.2f}, Sigma: {agg_sigma:>0.2f}")

Epsilon: 4.00, Delta: 0.50, Sigma: 0.53


## Preprocess

In [33]:
from torch_geometric.data import Data

# this method partitions based on nodes (so edges between splits are not used)
def train_test_split(dataset, test_ratio):
    X, y, edge_index= dataset.x, dataset.y, dataset.edge_index
    shuffle_ordering = torch.randperm(X.size(dim=0))

    edge_mapping = torch.zeros(X.size(dim=0), dtype=torch.long)
    edge_mapping[shuffle_ordering] = torch.arange(X.size(dim=0))

    X = X[shuffle_ordering]
    y = y[shuffle_ordering]
    edge_index = edge_mapping[edge_index]

    mask = torch.zeros(X.size(dim=0), dtype=torch.bool)
    train_slice = int((1-test_ratio)*X.size(dim=0))
    mask[:train_slice] = True

    X_train = X[mask]
    X_test = X[~mask]

    y_train = y[mask]
    y_test = y[~mask]

    edge_index_train = edge_index[:, torch.logical_and(*mask[edge_index])]
    edge_index_test = edge_index[:, torch.logical_and(*~mask[edge_index])] - train_slice

    return Data(x=X_train, y=y_train, edge_index=edge_index_train), \
           Data(x=X_test, y=y_test, edge_index=edge_index_test)


# returns filtered edge index, first removes edges that have removed src or dst nodes, then shifts indices of remained src/dst nodes
def filter_edge_index(edge_index, filter):

    node_indices = torch.arange(filter.size(dim=0))[filter]
    edge_mapping = torch.zeros(filter.size(dim=0), dtype=torch.long)
    edge_mapping[node_indices] = torch.arange(node_indices.size(dim=0))


    edge_index = edge_index.to(torch.long)
    edge_filter = torch.logical_and(*filter[edge_index])
    return edge_mapping[edge_index[:, edge_filter]]

def add_edge_to_low_degree_nodes(dataset, low_degree_threshold):
    X, y, edge_index = dataset.x, dataset.y, dataset.edge_index

    # get low degree nodes
    A = get_adjacency_matrix(edge_index, X.size(dim=0))
    sums = torch.sparse.sum(A, dim=1).to_dense()
    mask = sums < low_degree_threshold
    
    # get edge_index mask for neighbours of low degree nodes
    filter = mask[edge_index[0, :]]
    low_degree_edges_index = edge_index[:, filter]
    low_degree_A = get_adjacency_matrix(low_degree_edges_index, X.size(dim=0))

    # get 1-hop neighbours and add to A 
    # NOTE: without sampling (just adds all 1-hop neighbours)
    one_hop_low_degree_A = torch.sparse.mm(low_degree_A, A)
    new_edge_index = torch.add(one_hop_low_degree_A, A).coalesce().indices()
    return Data(x=X, y=y, edge_index=new_edge_index)
    

def prepare_dataset(dataset, threshold):
    X, y, edge_index = dataset.x, dataset.y, dataset.edge_index

    # remove labels with less examples than threshold
    index_map = torch.zeros(y.size())
    included_classes = y.unique(return_counts=True)[1] >= threshold
    filter = included_classes[y]
    # remap labels (i.e. if they were 0-8 and we remove 4 labels, new labels should be between 0 and 4)
    label_mapping = torch.zeros(included_classes.size(dim=0), dtype=torch.long)
    label_mapping[included_classes] = torch.arange(torch.count_nonzero(included_classes))

    y = label_mapping[y[filter]].to(torch.long)
    X = X[filter]

    # remove edges that had their nodes removed
    edge_index = filter_edge_index(edge_index, filter)

    return Data(x=X, y=y, edge_index=edge_index)

# make sparse adjacency matrix, A
def get_adjacency_matrix(edge_index, num_nodes):
    values = torch.ones(edge_index.size(dim=1), dtype = torch.int).to(device)
    A = torch.sparse_coo_tensor(edge_index, values, (num_nodes, num_nodes), dtype=torch.float)
    return A

def standardization(train_dataset, test_dataset):
    X = train_dataset.x
    means = X.mean(dim=0, keepdim=True)
    stds = X.std(dim=0, keepdim=True)
    X_train = (X - means) / stds
    X_test = (test_dataset.x - means) / stds
    return Data(x=X_train, y=train_dataset.y, edge_index=train_dataset.edge_index), Data(x=X_test, y=test_dataset.y, edge_index=test_dataset.edge_index)

def add_self_edges(dataset):
    X = dataset.x
    self_edges = torch.stack((torch.arange(X.size(dim=0)), torch.arange(X.size(dim=0))))
    edge_index = torch.cat((dataset.edge_index, self_edges), dim=1)
    return Data(x=X, y=dataset.y, edge_index=edge_index)

## Data

In [9]:
from torch_geometric.datasets import Reddit
dataset = Reddit('.')[0]
# prepare dataset by removing classes that have less than 1000 examples
dataset = prepare_dataset(dataset, 10000)
dataset = add_self_edges(dataset)
# get num classes
num_classes = torch.unique(dataset.y).size(dim=0)

# train/test split
train_dataset, test_dataset = train_test_split(dataset, 0.2)
print(train_dataset.edge_index.size(dim=1))
# train_dataset = add_edge_to_low_degree_nodes(train_dataset, 10)
print(train_dataset.edge_index.size(dim=1))
# test_dataset = add_edge_to_low_degree_nodes(test_dataset, 10)
train_dataset, test_dataset = standardization(train_dataset, test_dataset)

Downloading https://data.dgl.ai/dataset/reddit.zip
Extracting ./raw/reddit.zip
Processing...
Done!


29495772
29495772


In [10]:
from torch_geometric.loader import NeighborLoader

X_train, y_train, edge_index_train = train_dataset.x, train_dataset.y, train_dataset.edge_index
X_test, y_test, edge_index_test = test_dataset.x, test_dataset.y, test_dataset.edge_index

# using large number like 10,000 so that all neighbours are sampled 
# I don't like how it samples, so I'm just gonna sample everything
train_loader = NeighborLoader(train_dataset, num_neighbors=[X_train.size(dim=0)]*K_hop, 
                              batch_size=batch_size, shuffle=True)
test_loader = NeighborLoader(test_dataset, num_neighbors=[X_test.size(dim=0)]*K_hop, 
                             batch_size=batch_size, shuffle=True)

## Train/Test


In [19]:
# train
def train(batch, model, loss_fn, optimizer):
  model.train()
  X, y = batch.x.to(device), batch.y.to(device)
  AggregationModule.edge_index = batch.edge_index.to(device)
  # compute prediction error
  pred = model(X)
  loss = loss_fn(pred[:batch.batch_size], y[:batch.batch_size])
  # backpropagation
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

# test batch
def batch_test(batch, split, model, loss_fn, wordy=False):
    size = batch.batch_size
    model.eval()
    test_loss, correct = 0, 0
    with torch.inference_mode():
        X, y = batch.x.to(device), batch.y.to(device)
        AggregationModule.edge_index = batch.edge_index.to(device)
        pred = model(X)
        test_loss = loss_fn(pred[:batch.batch_size], y[:batch.batch_size]).item()
        correct = (pred[:batch.batch_size].argmax(1) == y[:batch.batch_size]).type(torch.float).sum().item() / size
    if wordy:
      print(f"{split.title()} Error: \n Accuracy: {(100*correct):>0.1f}%, Loss: {test_loss:>8f}")
    return test_loss, correct

# test
def test(loader, split, model, loss_fn):
    size = len(loader)
    model.eval()
    test_loss, correct = 0, 0
    for batch in loader:
        batch_loss, batch_correct = batch_test(batch, split, model, loss_fn)
        test_loss += batch_loss
        correct += batch_correct
    correct /= size
    test_loss /= size
    print(f"{split.title()} Error: \n Avg Accuracy: {(100*correct):>0.1f}%, Avg Loss: {test_loss:>8f}")

## Encoder

Encoder Design


In [20]:
# encoder
dimensions = [602, 300, 60]
encoder_model = nn.Sequential(
    MLP(dimensions),
    nn.Linear(dimensions[-1], num_classes),
    nn.Softmax(dim=1)
)

Encoder Pretraining

In [22]:
encoder_model = encoder_model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(encoder_model.parameters(), lr=1e-3)

# if node_level:
#   optimizer = op.optimizers.optimizer.DPOptimizer(
#       # TODO: Fill out these parameters '?'
#       optimizer=optimizer,
#       noise_multiplier=?,
#       max_grad_norm=?
#   )

for t in range(200):
    batch = next(iter(train_loader))
    train(batch, encoder_model, loss_fn, optimizer)
    if (t + 1) % 20 == 0:
      batch_test(next(iter(train_loader)), "TRAIN", encoder_model, loss_fn, True)
test(test_loader, "TEST", encoder_model, loss_fn)
print("Done!")

encoder = encoder_model[0]
encoder.requires_grad=False

# for name, param in encoder_model.named_parameters():
#     if param.requires_grad:
#         print(name, param.data)

Train Error: 
 Accuracy: 75.0%, Loss: 1.535050
Train Error: 
 Accuracy: 81.2%, Loss: 1.484142
Train Error: 
 Accuracy: 75.0%, Loss: 1.534483
Train Error: 
 Accuracy: 65.6%, Loss: 1.594603
Train Error: 
 Accuracy: 62.5%, Loss: 1.659892
Train Error: 
 Accuracy: 81.2%, Loss: 1.513257
Train Error: 
 Accuracy: 81.2%, Loss: 1.456602
Train Error: 
 Accuracy: 81.2%, Loss: 1.452872
Train Error: 
 Accuracy: 68.8%, Loss: 1.612048
Train Error: 
 Accuracy: 68.8%, Loss: 1.578732
Train Error: 
 Accuracy: 81.2%, Loss: 1.491779
Train Error: 
 Accuracy: 68.8%, Loss: 1.548485
Train Error: 
 Accuracy: 71.9%, Loss: 1.558250
Train Error: 
 Accuracy: 71.9%, Loss: 1.577147
Train Error: 
 Accuracy: 62.5%, Loss: 1.618584
Train Error: 
 Accuracy: 68.8%, Loss: 1.595579
Train Error: 
 Accuracy: 90.6%, Loss: 1.381599
Train Error: 
 Accuracy: 75.0%, Loss: 1.525755
Train Error: 
 Accuracy: 65.6%, Loss: 1.594417
Train Error: 
 Accuracy: 84.4%, Loss: 1.439731
Test Error: 
 Avg Accuracy: 75.4%, Avg Loss: 1.522556
Done!


## PMAT Training

In [23]:
from pyvacy import optim, analysis
# PMAT
pmat_model = nn.Sequential(
    encoder,
    PMAT(K_hop, 60, agg_sigma),
    Classification(K_hop, [], [(K_hop+1)*60, num_classes])
)
pmat_model = pmat_model.to(device)
loss_fn = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(encoder_model.parameters(), lr=1e-3)
# TODO: Not allowed dyanmic batch_size for DPSGD, our batches are edge-wise
# so they should have fixed batch_size!
optimizer = optim.DPAdam(
    l2_norm_clip=1.0,
    noise_multiplier=1.0,
    batch_size=batch_size,
    params=pmat_model.parameters(),
    lr=0.5e-1)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.5)

In [24]:
edge_epochs = 0
for t in range(10000):
    batch = next(iter(train_loader))
    train(batch, pmat_model, loss_fn, optimizer)
    edge_epochs += batch_size / X_train.size(dim=0)
    epsilon = analysis.moments_accountant(X_train.size(dim=0), batch_size, 
                                          1.0, edge_epochs, pmat_delta)
    if (t + 1) % 500 == 0:
      print("Epoch:", edge_epochs)
      batch_test(next(iter(train_loader)), "TRAIN", pmat_model, loss_fn, True)
      print("Optimizer Achieves ({:>0.1f}, {})-DP".format(epsilon, pmat_delta))
      print("LR:", scheduler.get_last_lr()[0])
    scheduler.step()
    if epsilon >= pmat_epsilon:
      break
test(test_loader, "TEST", pmat_model, loss_fn)
print("Done!")

pmat = pmat_model[1]
pmat.requires_grad = False

Epoch: 0.17095478246004095
Train Error: 
 Accuracy: 68.8%, Loss: 1.039748
Optimizer Achieves (0.0, 0.5)-DP
LR: 0.05
Epoch: 0.34190956492008306
Train Error: 
 Accuracy: 78.1%, Loss: 0.845472
Optimizer Achieves (0.1, 0.5)-DP
LR: 0.05
Epoch: 0.5128643473801252
Train Error: 
 Accuracy: 78.1%, Loss: 0.744260
Optimizer Achieves (0.1, 0.5)-DP
LR: 0.025
Epoch: 0.6838191298401674
Train Error: 
 Accuracy: 68.8%, Loss: 0.814368
Optimizer Achieves (0.1, 0.5)-DP
LR: 0.025
Epoch: 0.8547739123002095
Train Error: 
 Accuracy: 75.0%, Loss: 0.702001
Optimizer Achieves (0.1, 0.5)-DP
LR: 0.0125
Epoch: 1.0257286947602433
Train Error: 
 Accuracy: 87.5%, Loss: 0.645555
Optimizer Achieves (0.1, 0.5)-DP
LR: 0.0125
Epoch: 1.19668347722023
Train Error: 
 Accuracy: 84.4%, Loss: 0.622416
Optimizer Achieves (0.1, 0.5)-DP
LR: 0.00625
Epoch: 1.3676382596802166
Train Error: 
 Accuracy: 90.6%, Loss: 0.399685
Optimizer Achieves (0.1, 0.5)-DP
LR: 0.00625
Epoch: 1.5385930421402032
Train Error: 
 Accuracy: 87.5%, Loss: 0.44

## Full Model Training

TODO:
  - try other dataset
  - compare with their implementation
  - try train/test split across edges
  - batch normalization
  - add identity matrix to see performance

Train full model

In [50]:
model = GAP(encoder, 
            PMA(K_hop, agg_sigma), 
            Classification(K_hop, [60, 20], [(K_hop+1)*20, num_classes]))
# model = GAP(encoder, 
#             pmat, 
#             Classification(K_hop, [60, 20], [(K_hop+1)*20, num_classes]))
model = model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-1)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2000, gamma=0.6)

In [51]:
for t in range(10000):
    batch = next(iter(train_loader))
    train(batch, model, loss_fn, optimizer)
    if (t + 1) % 1000 == 0:
      batch_test(next(iter(train_loader)), "TRAIN", model, loss_fn, True)
    scheduler.step()
test(test_loader, "TEST", model, loss_fn)
print("Done!")

Train Error: 
 Accuracy: 93.8%, Loss: 0.390964
Train Error: 
 Accuracy: 93.8%, Loss: 0.130649
Train Error: 
 Accuracy: 96.9%, Loss: 0.260985
Train Error: 
 Accuracy: 100.0%, Loss: 0.064982
Train Error: 
 Accuracy: 100.0%, Loss: 0.000013
Train Error: 
 Accuracy: 100.0%, Loss: 0.010245
Train Error: 
 Accuracy: 96.9%, Loss: 0.064983
Train Error: 
 Accuracy: 100.0%, Loss: 0.000011
Train Error: 
 Accuracy: 100.0%, Loss: 0.040663
Train Error: 
 Accuracy: 96.9%, Loss: 0.064983
Test Error: 
 Avg Accuracy: 95.6%, Avg Loss: 0.202178
Done!


## Backup

In [None]:
# # TEMP CODE
# edge_index = torch.tensor([[0, 2, 0, 0, 2, 3, 1, 4, 1, 2, 4],
#                            [1, 0, 3, 4, 1, 1, 4, 4, 1, 3, 3]], dtype=torch.long)
# x = torch.tensor([[0, 1, 0], [1, 2, 2], [2, 3, 1], [3, 2, 4], [4, 2, 3]], dtype=torch.float)
# y = torch.tensor([0, 1, 1, 2, 2], dtype=torch.long)
# pma = PMAT(2, 3, 0)
# AggregationModule.edge_index = edge_index
# stack = pma(x)
# print(stack)

# # # plt.figure(figsize=(16,7))
# # # plt.imshow(tensor)
# # # plt.show()
# #         # [encoder, pma, element_wise_mlp, combine, mlp]


In [None]:
# from torch_geometric.datasets import Amazon

# dataset = Amazon('.', name='Computers')[0]
# # prepare dataset by removing classes that have less than 1000 examples
# dataset = prepare_dataset(dataset, 1000)
# dataset = add_self_edges(dataset)
# # get num classes
# num_classes = torch.unique(dataset.y).size(dim=0)

# # train/test split
# train_dataset, test_dataset = train_test_split(dataset, 0.2)
# print(train_dataset.edge_index.size(dim=1))
# train_dataset = add_edge_to_low_degree_nodes(train_dataset, 10)
# print(train_dataset.edge_index.size(dim=1))
# test_dataset = add_edge_to_low_degree_nodes(test_dataset, 10)
# train_dataset, test_dataset = standardization(train_dataset, test_dataset)

In [None]:
# # train
# def train(dataloader, model, loss_fn, optimizer, print_every = 100):
#     size = len(dataloader.dataset)
#     model.train()
#     for batch, (X, y) in enumerate(dataloader):
#         X, y = X.to(device), y.to(device)

#         # Compute prediction error
#         pred = model(X)
#         loss = loss_fn(pred, y)

#         # Backpropagation
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#         if batch % print_every == 0:
#             loss, current = loss.item(), batch * len(X)
#             print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

# # test
# def test(dataloader, model, loss_fn):
#     size = len(dataloader.dataset)
#     num_batches = len(dataloader)
#     model.eval()
#     test_loss, correct = 0, 0
#     with torch.inference_mode():
#         for X, y in dataloader:
#             X, y = X.to(device), y.to(device)
#             pred = model(X)
#             test_loss += loss_fn(pred, y).item()
#             correct += (pred.argmax(1) == y).type(torch.float).sum().item()
#     test_loss /= num_batches
#     correct /= size
#     print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [14]:
# from torch_geometric.loader import NeighborLoader
# from torch_geometric.data import Data
# edge_index = torch.tensor([[0, 2, 0, 0, 2, 3, 1, 4, 1, 2, 4],
#                            [1, 0, 3, 4, 1, 1, 4, 4, 1, 3, 3]], dtype=torch.long)
# x = torch.tensor([[0, 1, 0], [1, 2, 2], [2, 3, 1], [3, 2, 4], [4, 2, 3]], dtype=torch.float)
# y = torch.tensor([0, 1, 1, 2, 2], dtype=torch.long)
# data = Data(x=x, y=y, edge_index=edge_index)
# loader = NeighborLoader(data, [10], batch_size=2, shuffle=True)
# batch = next(iter(loader))
# print(batch.x)
# print(batch.edge_index)
# print(batch.x[:batch.batch_size, ])
# data = add_edge_to_low_degree_nodes(data, 3)
# print(data.x)
# data = standardization(data)
# print(data.x)
# print(data.x.std(dim=0))
# print(data.edge_index)
# print(get_adjacency_matrix(data.edge_index, x.size(dim=0)).to_dense())
# train, test = train_test_split(data, 0.2)
# print(train.x)
# print(train.edge_index)
# print(test.x)
# print(test.edge_index)

tensor([[4., 2., 3.],
        [3., 2., 4.],
        [0., 1., 0.],
        [1., 2., 2.],
        [2., 3., 1.]])
tensor([[2, 3, 0, 2, 4, 0],
        [0, 0, 0, 1, 1, 1]])
tensor([[4., 2., 3.],
        [3., 2., 4.]])


In [None]:
# # Sigma calculated above in node-level and edge-level DP case
# gap = GAP(encoder, PMA(A, K_hop, sigma), Classification(K_hop, [60, 30, 20], [(K_hop+1)*20, 60, 30, num_classes]))
# gap_model = gap.to(device)
# loss_fn = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(gap_model.parameters(), lr=1e-1)

# # if node_level:
# #   optimizer = op.optimizers.optimizer.DPOptimizer(
# #       # TODO: Fill out these parameters '?'
# #       optimizer=optimizer,
# #       noise_multiplier=?,
# #       max_grad_norm=?,
# #       loss_reduction='sum'
# #   ) 

# epochs = 500
# for t in range(epochs):
#     # print(f"Epoch {t+1}\n-------------------------------")
#     train(X, y, gap_model, loss_fn, optimizer)
#     if t % 10 == 0:
#       test(X, y, gap_model, loss_fn)
# print("Done!")