In [None]:
import os
os.chdir('..')

In [None]:
from src.utils import load_data, define_additional_args, compute_hypergeometric
from src.minibatch import Minibatch
from src.trainer import Trainer
from src.evaluator import Evaluator
import torch
import numpy as np
import pdb
from datetime import date, datetime
import time

In [None]:
# data_path = './data/ppi'
# num_subgraphs = 200
# num_par_samplers = 10
# use_cuda = True
# sampler_args = {
#     'method': 'rw',
#     'num_root': 200,
#     'depth': 1
# }
# num_iterations = 5000
# model_args = {
#     'arch': 'GraphSAGE',
#     'hidden_channels': 512,
#     'dropout': 0.1,
#     'num_layers': 1
# }
# training_args = {
#     'method': 'normal',
#     'loss': 'sigmoid',
#     'lr': 0.01,
#     'clip_norm': 5
# }
# save_model_dir = f'/nfs/students/ayle/NodeDP/models/{str(datetime.now())}'
# eval_every = 10

In [None]:
data_path = './data/ogbn-arxiv_undirected'
num_subgraphs = 200
num_par_samplers = 10
use_cuda = True
sampler_args = {
    'method': 'drw',
    'num_root': 300,
    'depth': 2
}
num_iterations = 1000
model_args = {
    'arch': 'GCN',
    'hidden_channels': 512,
    'dropout': 0.1,
    'num_layers': 2
}
training_args = {
    'method': 'ours',
    'loss': 'softmax',
    'lr': 0.01,
    
    'alpha': 2,
    'delta': 1e-4
}
save_model_dir = f'/nfs/students/ayle/NodeDP/models/{str(datetime.now())}'
eval_every = 10

In [None]:
# data_path = './data/ogbn-arxiv_undirected'
# num_subgraphs = 200
# num_par_samplers = 10
# use_cuda = True
# sampler_args = {
#     'method': 'nodes_max',
#     'num_nodes': 1000,
#     'max_degree': 7
# }
# num_iterations = 1000
# model_args = {
#     'arch': 'GCN',
#     'hidden_channels': 512,
#     'dropout': 0.1,
#     'num_layers': 1
# }
# training_args = {
#     'method': 'node_dp_max_degree',
#     'loss': 'softmax',
#     'lr': 0.01,
    
#     'alpha': 2,
#     'delta': 1e-4
# }
# save_model_dir = f'/nfs/students/ayle/NodeDP/models/{str(datetime.now())}'
# eval_every = 10

In [None]:
out = print

In [None]:
adj_full, adj_train, feats, class_arr, role = load_data(data_path, out)

In [None]:
num_subgraphs_per_sampler = define_additional_args(num_subgraphs, num_par_samplers, out)

In [None]:
minibatch = Minibatch(adj_full, adj_train, role, num_par_samplers, num_subgraphs_per_sampler, use_cuda, sampler_args)

In [None]:
trainer = Trainer(training_args, model_args, feats, class_arr, use_cuda, minibatch, out)
evaluator = Evaluator(model_args, feats, class_arr, training_args['loss'])

In [None]:
if training_args['method'] == 'ours':
    K = sampler_args['depth'] + 1                                    # number of affected nodes in one batch
    m = sampler_args['num_root'] * (sampler_args['depth'] + 1)       # number of nodes sampled in one batch
    C = trainer.C                                                    # max sensitivity
    sigma = K
    
    total_gamma = 0
    gho = compute_hypergeometric(len(minibatch.node_train), K, m)
    
elif training_args['method'] == 'node_dp_max_degree':
    K = sampler_args['max_degree'] + 1          # number of affected nodes in one batch
    m = sampler_args['num_nodes']               # number of nodes sampled in one batch
    C = trainer.C                               # max sensitivity
    sigma = 2 * K
    
    total_gamma = 0
    gho = compute_hypergeometric(len(minibatch.node_train), K, m)

In [None]:
t1 = time.time()
for it in range(num_iterations):
    if training_args['method'] == 'normal':
        trainer.train_step(*minibatch.sample_one_batch(out))
    elif training_args['method'] in ['ours', 'node_dp_max_degree']:
        trainer.dp_train_step_fast(*minibatch.sample_one_batch(out), sigma=sigma)
        
        total_gamma += 1 / (training_args['alpha'] - 1) * np.log(sum(np.array([p * (np.exp(training_args['alpha'] * (training_args['alpha'] - 1) * 2 * (i * C) ** 2 / (sigma * C) ** 2))])
                                                                           for i, p in enumerate(gho))[0])
    
    if it % eval_every == 0:
        t2 = time.time()
        evaluator.model.load_state_dict(trainer.model.state_dict())
        preds, labels = evaluator.eval_step(*minibatch.sample_one_batch(out, mode='val'))
        metrics = evaluator.calc_metrics(preds, labels)
        
        print_statement = f"Iteration {it}:"
        for metric, val in metrics.items():
            print_statement += f"\t {metric} = {val}"
        print_statement += f"\t Training Time = {t2-t1}"
        out(print_statement)
        
        if training_args['method'] in ['ours', 'node_dp_max_degree']:
            out("RDP: (" + str(training_args['alpha']) + "," + str(total_gamma) + ")")
            eps = total_gamma + np.log(1 / training_args['delta']) / (training_args['alpha'] - 1)
            out("DP: (" + str(eps) + "," + str(training_args['delta']) + ")")
        
        t1 = time.time()

In [None]:
RDP: (2,0.4951858588028699)
DP: (9.705526230779052,0.0001)
Iteration 340:	 F1 Micro = 0.3025575311645595	 F1 Macro = 0.043780989711908525	 Accuracy = 0.3025575311645595	 Training Time = 1.7059597969055176
RDP: (2,0.5101461566518992)
DP: (9.720486528628081,0.0001)