In [3]:
import torch
import numpy as np
from sklearn.metrics import pairwise

from src.data import Dataset
from src.utils import masking, compute_accuracy

from layers import GCN, Classifier
from models.GraFN import GraFN

We attach this notebook files to demonstrate reproducibility of our model.  
**config model** function loads our model **GraFN**  
and **evaluate_node_classification** function loads the trained weight on given labeled ratio and evaluate node classification performance.  
This evaluation protocol is same with experiments on our paper.

In [4]:
def config_model(data, device=0):
    input_size = data.x.size(1)
    layers = [input_size, 128, 128]

    unique_labels = data.y.unique()
    num_classes = len(unique_labels)

    encoder = GCN(layers)
    classifier = Classifier(128, num_classes)

    model = GraFN(encoder=encoder, classifier=classifier, unique_labels=unique_labels, device=device) 
    
    return model

In [5]:
def evaluate_node_classification(name, label_rate, folds=20):
    
    data = Dataset(root='data', dataset=name)[0]
    val_accs = [] ; test_accs = []
    for fold in range(folds):
        train_mask, val_mask, test_mask = masking(fold, data, label_rate)
        
        weight_path = f'./weights/{name}_{label_rate}.pt'
        weight_list = torch.load(weight_path)
        weight = weight_list[fold]

        model = config_model(data)
        model.load_state_dict(weight)

        model.eval()
        _, preds = model.cls(data)
        
        _ , val_acc, test_acc = compute_accuracy(preds, data.y, train_mask, val_mask, test_mask)
        val_accs.append(val_acc) ; test_accs.append(test_acc)

    val_acc_mean = torch.tensor(val_accs).mean().item()
    test_acc_mean = torch.tensor(test_accs).mean().item()
    print("{} label rate : {} --> valid acc : {:.2f} / test acc : {:.2f}".format(name, label_rate, val_acc_mean, test_acc_mean))
    

# Node Classification

Because of the unavoidable randomness, below performance is not exactly same with **table 1**.  
However, you can check that it's enough to demonstrate the reproducibility of our model **GraFN**. 

In [6]:
evaluate_node_classification('computers', label_rate=0.15)
evaluate_node_classification('computers', label_rate=0.2)
evaluate_node_classification('computers', label_rate=0.25)

computers label rate : 0.15 --> valid acc : 72.38 / test acc : 71.87
computers label rate : 0.2 --> valid acc : 75.07 / test acc : 74.80
computers label rate : 0.25 --> valid acc : 78.52 / test acc : 77.53


In [7]:
evaluate_node_classification('photo', label_rate=0.15)
evaluate_node_classification('photo', label_rate=0.2)
evaluate_node_classification('photo', label_rate=0.25)

photo label rate : 0.15 --> valid acc : 79.55 / test acc : 79.50
photo label rate : 0.2 --> valid acc : 82.46 / test acc : 81.36
photo label rate : 0.25 --> valid acc : 85.49 / test acc : 85.54


In [8]:
evaluate_node_classification('cora', label_rate=0.5)
evaluate_node_classification('cora', label_rate=1.0)
evaluate_node_classification('cora', label_rate=2.0)

cora label rate : 0.5 --> valid acc : 66.99 / test acc : 66.18
cora label rate : 1.0 --> valid acc : 73.71 / test acc : 72.47
cora label rate : 2.0 --> valid acc : 78.47 / test acc : 77.21


In [9]:
evaluate_node_classification('citeseer', label_rate=0.5)
evaluate_node_classification('citeseer', label_rate=1.0)
evaluate_node_classification('citeseer', label_rate=2.0)


citeseer label rate : 0.5 --> valid acc : 58.37 / test acc : 57.25
citeseer label rate : 1.0 --> valid acc : 66.87 / test acc : 66.42
citeseer label rate : 2.0 --> valid acc : 70.74 / test acc : 69.85


In [10]:
evaluate_node_classification('pubmed', label_rate=0.03)
evaluate_node_classification('pubmed', label_rate=0.06)
evaluate_node_classification('pubmed', label_rate=0.1)

pubmed label rate : 0.03 --> valid acc : 66.78 / test acc : 66.14
pubmed label rate : 0.06 --> valid acc : 70.09 / test acc : 69.36
pubmed label rate : 0.1 --> valid acc : 76.00 / test acc : 75.55
