# Node Classification
Sadly, we do not have the computational power to execute the code for the OGB-mag and OGB-products dataset. The memory that is needed exceeds the 12GB given by google colab. However, the code is fully functional and could be used to check the results if the resources are there.

In [None]:
#@title Setup For Google Colab

REPO_URL = 'https://github.com/Timob12/SSGC-Reimplementation.git'

!git clone "{REPO_URL}"

# install dependencies
!pip install scipy~=1.6.2
!pip install nni~=2.3
!pip install ogb

import torch

TORCH = torch.__version__.split('+')[0]
CUDA = 'cu' + torch.version.cuda.replace('.','')

!pip install torch-scatter     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-sparse      -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-cluster     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

MAIN_DIR = '/content/SSGC-Reimplementation'

# pull the node classification data for using the models from another repository
DATA_REPO_URL = 'https://github.com/allenhaozhu/SSGC.git'

!git clone "{DATA_REPO_URL}"

%cp -rv /content/SSGC/data /content/SSGC-Reimplementation/data
%cp -rv /content/SSGC/model /content/SSGC-Reimplementation

# download the document classification data
import gdown
import os

DOC_CLASSIFICATION_DATA_URL = 'https://drive.google.com/u/0/uc?id=10kx3z3bjYFoeRjjg1_DZOAP39Jln0BCh'
DOC_CLASSIFICATION_DIR = os.path.join(MAIN_DIR, 'DocumentClassification') 
DATA_DEST = os.path.join(DOC_CLASSIFICATION_DIR, 'data.tar.gz')

gdown.download(DOC_CLASSIFICATION_DATA_URL, output=DATA_DEST, quiet=False)

!tar -xf "{DATA_DEST}" -C "{DOC_CLASSIFICATION_DIR}"

%cd "{MAIN_DIR}"

fatal: destination path 'SSGC-Reimplementation' already exists and is not an empty directory.
Looking in links: https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html
Looking in links: https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html
Looking in links: https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html
fatal: destination path 'SSGC' already exists and is not an empty directory.
'/content/SSGC/data' -> '/content/SSGC-Reimplementation/data/data'
'/content/SSGC/data/ind.citeseer.allx' -> '/content/SSGC-Reimplementation/data/data/ind.citeseer.allx'
'/content/SSGC/data/ind.citeseer.ally' -> '/content/SSGC-Reimplementation/data/data/ind.citeseer.ally'
'/content/SSGC/data/ind.citeseer.graph' -> '/content/SSGC-Reimplementation/data/data/ind.citeseer.graph'
'/content/SSGC/data/ind.citeseer.test.index' -> '/content/SSGC-Reimplementation/data/data/ind.citeseer.test.index'
'/content/SSGC/data/ind.citeseer.tx' -> '/content/SSGC-Reimplementation/data/data/ind.citeseer.tx'
'/content

Downloading...
From: https://drive.google.com/u/0/uc?id=10kx3z3bjYFoeRjjg1_DZOAP39Jln0BCh
To: /content/SSGC-Reimplementation/DocumentClassification/data.tar.gz
100%|██████████| 348M/348M [00:01<00:00, 221MB/s]


/content/SSGC-Reimplementation


## Small/Medium Dataset

In [None]:
import time
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from utils import load_citation, sgc_precompute, set_seed
from models import get_model
from metrics import accuracy
import pickle as pkl
#from args import get_citation_args
from time import perf_counter

# Arguments

# setting random seeds
#set_seed(seed, cuda)

#tuned = False
#arg_model = "SGC"
#dataset = "cora"
weight_decay = 5e-6
#seed = 42
#cuda = False
normalization = 'NormAdj'
hidden = 0
dropout = 0
degree = 16
alpha = 0.05
epochs = 1000
lr = 0.02

def setup(dataset, normalization, degree, alpha):
    adj, features, labels, idx_train, idx_val, idx_test = load_citation(dataset, normalization, False)

    model = get_model("SGC", features.size(1), labels.max().item()+1, 0, 0, False)

    features, precompute_time = sgc_precompute(features, adj, degree, alpha)
    
    return precompute_time, features, labels, idx_train, idx_val, idx_test, model

def train_regression(model,
                     train_features, train_labels,
                     val_features, val_labels,
                     epochs=epochs, weight_decay=weight_decay,
                     lr=lr, dropout=dropout):

    optimizer = optim.Adam(model.parameters(), lr=lr,
                           weight_decay=weight_decay)
    t = perf_counter()
    best_acc_val = torch.zeros((1))
    best_model = None
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(train_features)
        loss_train = F.cross_entropy(output, train_labels)
        loss_train.backward()
        optimizer.step()
        with torch.no_grad():
            model.eval()
            output = model(val_features)
            acc_val = accuracy(output, val_labels)
            if best_acc_val < acc_val:
                best_acc_val = acc_val
                best_model = model

    train_time = perf_counter()-t

    # with torch.no_grad():
    #     model.eval()
    #     output = model(val_features)
    #     acc_val = accuracy(output, val_labels)

    return best_model, best_acc_val, train_time

def test_regression(model, test_features, test_labels):
    model.eval()
    return accuracy(model(test_features), test_labels)


datasets = ["cora", "citeseer", "pubmed"]
for ds in datasets:
    acc_test = 0
    total_precompute_time = 0
    total_train_time = 0
    for _ in range(10):
        precompute_time, features, labels, idx_train, idx_val, idx_test, model = setup(ds, normalization, degree, alpha)

        model, acc_val, train_time = train_regression(model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val],
                    epochs, weight_decay, lr, dropout)
        acc_test += test_regression(model, features[idx_test], labels[idx_test])
        total_precompute_time += precompute_time
        total_train_time += train_time
    acc_test /= 10
    total_precompute_time /= 10
    total_train_time /= 10
    print("dataset name:", ds)
    print("Validation Accuracy: {:.4f} Test Accuracy: {:.4f}".format(acc_val, acc_test))
    print("Pre-compute time: {:.4f}s, train time: {:.4f}s, total: {:.4f}s".format(total_precompute_time, total_train_time, total_precompute_time+total_train_time))

dataset name: cora
Validation Accuracy: 0.8000 Test Accuracy: 0.8110
Pre-compute time: 0.4643s, train time: 1.3363s, total: 1.8006s


  r_inv = np.power(rowsum, -1).flatten()


dataset name: citeseer
Validation Accuracy: 0.7080 Test Accuracy: 0.6987
Pre-compute time: 1.6117s, train time: 2.6039s, total: 4.2156s
dataset name: pubmed
Validation Accuracy: 0.8020 Test Accuracy: 0.7960
Pre-compute time: 1.7902s, train time: 1.2669s, total: 3.0571s


## Alpha and K Comparison on Cora, Citeseer, and Pubmed

In [None]:
datasets = ["cora", "citeseer", "pubmed"]
alpha_ = [0.0, 0.05, 0.1, 0.15]
K_ = [2, 4, 8, 16, 32, 64]
alpha_standard = 0.05
K_standard = 16

import pandas as pd

alpha_results = []
k_results = []

for dataset in datasets:
    K = K_standard
    alpha_to_acc = {}
    for alpha in alpha_:
        precompute_time, features, labels, idx_train, idx_val, idx_test, model = setup(dataset, normalization, K_standard, alpha)

        model, acc_val, train_time = train_regression(model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val],
                    epochs, weight_decay, lr, dropout)
        acc = test_regression(model, features[idx_test], labels[idx_test])

        print("dataset:", dataset, ", average accuracy:", acc, "alpha:", alpha)

        alpha_to_acc[alpha] = acc.item()
    
    row = {'Dataset': dataset}
    row.update(alpha_to_acc)
    alpha_results.append(row)

for dataset in datasets:
    alpha = alpha_standard
    k_to_acc = {}
    for K in K_:
        precompute_time, features, labels, idx_train, idx_val, idx_test, model = setup(dataset, normalization, K, alpha_standard)

        model, acc_val, train_time = train_regression(model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val],
                    epochs, weight_decay, lr, dropout)
        acc = test_regression(model, features[idx_test], labels[idx_test])

        print("dataset:", dataset, ", average accuracy:", acc, "K:", K)

        k_to_acc[K] = acc.item()

    row = {'Dataset': dataset}
    row.update(k_to_acc)
    k_results.append(row)

dataset: cora , average accuracy: tensor(0.8030, dtype=torch.float64) alpha: 0.0
dataset: cora , average accuracy: tensor(0.8110, dtype=torch.float64) alpha: 0.05
dataset: cora , average accuracy: tensor(0.8130, dtype=torch.float64) alpha: 0.1
dataset: cora , average accuracy: tensor(0.8180, dtype=torch.float64) alpha: 0.15


  r_inv = np.power(rowsum, -1).flatten()


dataset: citeseer , average accuracy: tensor(0.6990, dtype=torch.float64) alpha: 0.0
dataset: citeseer , average accuracy: tensor(0.6990, dtype=torch.float64) alpha: 0.05
dataset: citeseer , average accuracy: tensor(0.6960, dtype=torch.float64) alpha: 0.1
dataset: citeseer , average accuracy: tensor(0.6960, dtype=torch.float64) alpha: 0.15
dataset: pubmed , average accuracy: tensor(0.7900, dtype=torch.float64) alpha: 0.0
dataset: pubmed , average accuracy: tensor(0.7960, dtype=torch.float64) alpha: 0.05
dataset: pubmed , average accuracy: tensor(0.7950, dtype=torch.float64) alpha: 0.1
dataset: pubmed , average accuracy: tensor(0.7950, dtype=torch.float64) alpha: 0.15
dataset: cora , average accuracy: tensor(0.7790, dtype=torch.float64) K: 2
dataset: cora , average accuracy: tensor(0.8040, dtype=torch.float64) K: 4
dataset: cora , average accuracy: tensor(0.8080, dtype=torch.float64) K: 8
dataset: cora , average accuracy: tensor(0.8110, dtype=torch.float64) K: 16
dataset: cora , average

In [None]:
alpha_df = pd.DataFrame(alpha_results)
alpha_df.to_csv('alpha_results.csv', index=False)
alpha_df

Unnamed: 0,Dataset,0.0,0.05,0.1,0.15
0,cora,0.803,0.811,0.813,0.818
1,citeseer,0.699,0.699,0.696,0.696
2,pubmed,0.79,0.796,0.795,0.795


In [None]:
k_df = pd.DataFrame(k_results)
k_df.to_csv('k_results.csv', index=False)
k_df

Unnamed: 0,Dataset,2,4,8,16,32,64
0,cora,0.779,0.804,0.808,0.811,0.813,0.798
1,citeseer,0.676,0.681,0.691,0.698,0.7,0.693
2,pubmed,0.772,0.782,0.79,0.797,0.784,0.764


# Large OGB-ARXIV Dataset

## Embedding

In [None]:
import argparse

import torch
import torch.nn.functional as F

from torch_geometric.utils import dropout_adj
import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
from typing import Optional
from torch_geometric.typing import Adj, OptTensor

from torch import Tensor
from torch.nn import Linear
from torch_sparse import SparseTensor, matmul
#from logger import Logger
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch.nn import Linear
from torch_geometric.nn.conv import MessagePassing
from time import perf_counter



class SGConv(MessagePassing):
    r"""The simple graph convolutional operator from the `"Simplifying Graph
    Convolutional Networks" <https://arxiv.org/abs/1902.07153>`_ paper

    .. math::
        \mathbf{X}^{\prime} = {\left(\mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
        \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X} \mathbf{\Theta},

    where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
    adjacency matrix with inserted self-loops and
    :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

    Args:
        in_channels (int): Size of each input sample.
        out_channels (int): Size of each output sample.
        K (int, optional): Number of hops :math:`K`. (default: :obj:`1`)
        cached (bool, optional): If set to :obj:`True`, the layer will cache
            the computation of :math:`{\left(\mathbf{\hat{D}}^{-1/2}
            \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X}` on
            first execution, and will use the cached version for further
            executions.
            This parameter should only be set to :obj:`True` in transductive
            learning scenarios. (default: :obj:`False`)
        add_self_loops (bool, optional): If set to :obj:`False`, will not add
            self-loops to the input graph. (default: :obj:`True`)
        bias (bool, optional): If set to :obj:`False`, the layer will not learn
            an additive bias. (default: :obj:`True`)
        **kwargs (optional): Additional arguments of
            :class:`torch_geometric.nn.conv.MessagePassing`.
    """

    _cached_x: Optional[Tensor]

    def __init__(self, in_channels: int, out_channels: int, K: int = 1,
                 cached: bool = False, add_self_loops: bool = True,
                 bias: bool = True, dropout: float = 0.05, **kwargs):
        super(SGConv, self).__init__(aggr='add', **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.K = K
        self.cached = cached
        self.add_self_loops = add_self_loops
        self.dropout = dropout

        self._cached_x = None

        self.lin = Linear(in_channels, out_channels, bias=bias)

        self.reset_parameters()

    def reset_parameters(self):
        #self.lin.reset_parameters()
        self._cached_x = None

    def forward(self, x: Tensor, edge_index: Adj,
                edge_weight: OptTensor = None) -> Tensor:
        """"""
        cache = self._cached_x
        if cache is None:
            if isinstance(edge_index, Tensor):
                edge_index, edge_weight = gcn_norm(  # yapf: disable
                    edge_index, edge_weight, x.size(self.node_dim), False,
                    self.add_self_loops, dtype=x.dtype)
            elif isinstance(edge_index, SparseTensor):
                edge_index = gcn_norm(  # yapf: disable
                    edge_index, edge_weight, x.size(self.node_dim), False,
                    self.add_self_loops, dtype=x.dtype)

            x_set = []
            alpha = 0.05
            output = alpha * x
            #temp_edge_index, edge_weight = dropout_adj(edge_index, 0.5)
            for k in range(self.K):
                x = self.propagate(edge_index, x=x, edge_weight=edge_weight,
                                   size=None)
                # x_set.append(x)
                output = output + (1. / self.K) * x
            # x = torch.stack(x_set,2)
            # alpha = 0.05
            # x = (1-alpha)*torch.mean(x,2).squeeze() + alpha*x_ori
            x = output
            if self.cached:
                self._cached_x = x
        else:
            x = cache

        return x#self.lin(x)

    def message(self, x_j: Tensor, edge_weight: Tensor) -> Tensor:
        return edge_weight.view(-1, 1) * x_j

    def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor:
        return matmul(adj_t, x, reduce=self.aggr)

    def __repr__(self):
        return '{}({}, {}, K={})'.format(self.__class__.__name__,
                                         self.in_channels, self.out_channels,
                                         self.K)


class SGC(torch.nn.Module):
    def __init__(self, in_channels, hidden, out_channels, num_layers, dropout):
        super(SGC, self).__init__()
        self.conv1 = SGConv(
            in_channels, hidden, K=num_layers, cached=True)
        self.lin = torch.nn.Linear(hidden, out_channels)
        self.dropout = dropout

    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.lin.reset_parameters()

    def forward(self, x, edge_index):
        # x, edge_index = data.x, data.edge_index
        #x = self.conv1(x, edge_index)
        # x = F.relu(x)
        # x = F.dropout(x, p=self.dropout, training=self.training)
        return self.conv1(x, edge_index)#F.log_softmax(x, dim=1)


class SAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(SAGE, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x, adj_t):
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, adj_t)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        return x.log_softmax(dim=-1)


def train(model, data, train_idx, optimizer):
    model.train()

    optimizer.zero_grad()
    out = model(data.x, data.adj_t)[train_idx]
    loss = F.nll_loss(out, data.y.squeeze(1)[train_idx])
    loss.backward()
    optimizer.step()

    return loss.item()


@torch.no_grad()
def test(model, data, split_idx, evaluator):
    model.eval()

    out = model(data.x, data.adj_t)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': data.y[split_idx['train']],
        'y_pred': y_pred[split_idx['train']],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': data.y[split_idx['valid']],
        'y_pred': y_pred[split_idx['valid']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': data.y[split_idx['test']],
        'y_pred': y_pred[split_idx['test']],
    })['acc']

    return train_acc, valid_acc, test_acc

precompute_time = 0.0
def main():

    device = f'cuda:{0}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-arxiv',
                                     transform=T.ToSparseTensor())

    data = dataset[0]
    data.adj_t = data.adj_t.to_symmetric()
    data = data.to(device)

    split_idx = dataset.get_idx_split()
    train_idx = split_idx['train'].to(device)
    
    precompute_time = perf_counter()
    model = SGC(data.num_features, 256, dataset.num_classes, 16, 0.5).to(device)
    
    features = model(data.x, data.adj_t)
    torch.save(features,'embedding_arxiv.pt')
    precompute_time = perf_counter() - precompute_time

    return precompute_time
precompute_time = main()

Downloading http://snap.stanford.edu/ogb/data/nodeproppred/arxiv.zip


Downloaded 0.08 GB: 100%|██████████| 81/81 [00:02<00:00, 30.33it/s]


Extracting dataset/arxiv.zip


Processing...


Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 1/1 [00:00<00:00, 6700.17it/s]


Converting graphs into PyG objects...


100%|██████████| 1/1 [00:00<00:00, 1760.09it/s]

Saving...



Done!


## SSGC

In [None]:
import time
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from utils import load_citation, sgc_precompute, set_seed
from models import get_model
from metrics import accuracy
import pickle as pkl
#from args_cora import get_citation_args
from time import perf_counter
from scipy.sparse import coo_matrix, csr_matrix
from ogb.nodeproppred import PygNodePropPredDataset
import torch_geometric.transforms as T
from normalization import fetch_normalization, row_normalize
from utils import *

# Arguments
#args = get_citation_args()

#if args.tuned:
#    if args.model == "SGC":
#        with open("{}-tuning/{}.txt".format(args.model, args.dataset), 'rb') as f:
#            args.weight_decay = pkl.load(f)['weight_decay']
#            print("using tuned weight decay: {}".format(args.weight_decay))
#    else:
#        raise NotImplemented

# setting random seeds
#set_seed(args.seed, args.cuda)

dataset = PygNodePropPredDataset(name='ogbn-arxiv', transform=T.ToSparseTensor())
data = dataset[0]

split_idx = dataset.get_idx_split()
idx_train = split_idx['train']
idx_val = split_idx['valid']
idx_test = split_idx['test']
labels = torch.LongTensor([y[0] for y in data.y.tolist()])

features = torch.load('embedding_arxiv.pt', map_location='cpu')

model = get_model("SGC", features.shape[1], labels.max().item()+1, 0, 0, 0)

def train_regression(model,
                     train_features, train_labels,
                     val_features, val_labels,
                     epochs=100, weight_decay=1e-05,
                     lr=0.2, dropout=0):

    optimizer = optim.Adam(model.parameters(), lr=lr,
                           weight_decay=weight_decay)
    t = perf_counter()
    best_acc_val = torch.zeros((1))
    best_loss_val = 100.
    best_model = None
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(train_features)
        loss_train = F.cross_entropy(output, train_labels)
        loss_train.backward()
        optimizer.step()
        with torch.no_grad():
            model.eval()
            output = model(val_features)
            acc_val = accuracy(output, val_labels)
            loss_val = F.cross_entropy(output, val_labels)
            if best_acc_val < acc_val:
                 best_acc_val = acc_val
            #     best_model = model
            if best_loss_val > loss_val:
                best_loss_val = loss_val
                best_model = model

    train_time = perf_counter()-t

    # with torch.no_grad():
    #     model.eval()
    #     output = model(val_features)
    #     acc_val = accuracy(output, val_labels)

    return best_model, best_acc_val, train_time

def test_regression(model, test_features, test_labels):
    model.eval()
    return accuracy(model(test_features), test_labels)

acc_test = 0
acc_val = 0
train_time = 0
for _ in range(10):
    model, acc_val_buf, train_time_buf = train_regression(model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val],
                        100, 1e-05, 0.02, 0)
    acc_test += test_regression(model, features[idx_test], labels[idx_test])
    acc_val += acc_val_buf
    train_time += train_time_buf
acc_test /= 10
acc_val /= 10
train_time /= 10

print("Validation Accuracy: {:.4f} Test Accuracy: {:.4f}".format(acc_val, acc_test))
print("Pre-compute time: {:.4f}s, train time: {:.4f}s, total: {:.4f}s".format(precompute_time, train_time, precompute_time+train_time))

Validation Accuracy: 0.6746 Test Accuracy: 0.6588
Pre-compute time: 0.8215s, train time: 9.2973s, total: 10.1188s


## SSGC + MLP

In [None]:
import argparse

import torch
import torch.nn.functional as F

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

#from logger import Logger


class MLP(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(MLP, self).__init__()

        self.lins = torch.nn.ModuleList()
        self.lins.append(torch.nn.Linear(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.lins.append(torch.nn.Linear(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.lins.append(torch.nn.Linear(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters(self):
        for lin in self.lins:
            lin.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x):
        for i, lin in enumerate(self.lins[:-1]):
            x = lin(x)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lins[-1](x)
        return torch.log_softmax(x, dim=-1)


def train(model, x, y_true, train_idx, optimizer):
    model.train()

    optimizer.zero_grad()
    out = model(x[train_idx])
    loss = F.nll_loss(out, y_true.squeeze(1)[train_idx])
    loss.backward()
    optimizer.step()

    return loss.item()


@torch.no_grad()
def test(model, x, y_true, split_idx, evaluator):
    model.eval()

    out = model(x)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': y_true[split_idx['train']],
        'y_pred': y_pred[split_idx['train']],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': y_true[split_idx['valid']],
        'y_pred': y_pred[split_idx['valid']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': y_true[split_idx['test']],
        'y_pred': y_pred[split_idx['test']],
    })['acc']

    return train_acc, valid_acc, test_acc


def main():

    device = f'cuda:{0}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-arxiv')
    split_idx = dataset.get_idx_split()
    data = dataset[0]

    x = data.x
    embedding = torch.load('embedding_arxiv.pt', map_location='cpu')
    #x = torch.cat([x, embedding], dim=-1)
    x = embedding
    x = x.to(device)

    y_true = data.y.to(device)
    train_idx = split_idx['train'].to(device)

    model = MLP(x.size(-1), 256, dataset.num_classes,
                3, 0.5).to(device)

    evaluator = Evaluator(name='ogbn-arxiv')
    #logger = Logger(args.runs, args)

    train_accs, valid_accs, test_accs = [], [], []
    for run in range(10):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
        for epoch in range(1, 100):
            loss = train(model, x, y_true, train_idx, optimizer)
            result = test(model, x, y_true, split_idx, evaluator)
            #logger.add_result(run, result)

            if epoch % 10 == 0:
                train_acc, valid_acc, test_acc = result
                print(f'Run: {1 + run:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Loss: {loss:.4f}, '
                      f'Train: {100 * train_acc:.2f}%, '
                      f'Valid: {100 * valid_acc:.2f}%, '
                      f'Test: {100 * test_acc:.2f}%')
            
        result = test(model, x, y_true, split_idx, evaluator)
        train_acc, valid_acc, test_acc = result
        train_accs.append(train_acc), valid_accs.append(valid_acc), test_accs.append(test_accs)

        #logger.print_statistics(run)
    #logger.print_statistics()

    print(f'average train_acc: {sum(train_accs) / len(train_accs):.2f}')

main()



Run: 01, Epoch: 10, Loss: 1.7556, Train: 40.45%, Valid: 40.53%, Test: 43.80%
Run: 01, Epoch: 20, Loss: 1.4259, Train: 58.60%, Valid: 58.05%, Test: 59.07%
Run: 01, Epoch: 30, Loss: 1.2920, Train: 64.55%, Valid: 64.40%, Test: 64.70%
Run: 01, Epoch: 40, Loss: 1.2113, Train: 67.49%, Valid: 67.85%, Test: 67.81%
Run: 01, Epoch: 50, Loss: 1.1575, Train: 69.26%, Valid: 69.05%, Test: 68.21%
Run: 01, Epoch: 60, Loss: 1.1116, Train: 70.23%, Valid: 69.73%, Test: 68.89%
Run: 01, Epoch: 70, Loss: 1.0832, Train: 70.83%, Valid: 70.38%, Test: 69.60%
Run: 01, Epoch: 80, Loss: 1.0592, Train: 71.49%, Valid: 70.90%, Test: 69.84%
Run: 01, Epoch: 90, Loss: 1.0393, Train: 71.45%, Valid: 70.76%, Test: 70.36%
Run: 02, Epoch: 10, Loss: 1.7567, Train: 43.22%, Valid: 39.08%, Test: 43.03%
Run: 02, Epoch: 20, Loss: 1.4289, Train: 59.84%, Valid: 59.20%, Test: 60.99%
Run: 02, Epoch: 30, Loss: 1.2996, Train: 65.34%, Valid: 65.85%, Test: 65.78%
Run: 02, Epoch: 40, Loss: 1.2188, Train: 67.56%, Valid: 67.62%, Test: 66.99%

# Large OGB-MAG Dataset

## Embedding

In [None]:
import argparse

import torch
import torch.nn.functional as F
from torch_geometric.data import Data
import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
from typing import Optional
from torch_geometric.typing import Adj, OptTensor

from torch import Tensor
from torch.nn import Linear
from torch_sparse import SparseTensor, matmul
#from logger import Logger
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch.nn import Linear
from torch_geometric.nn.conv import MessagePassing



class SGConv(MessagePassing):
    r"""The simple graph convolutional operator from the `"Simplifying Graph
    Convolutional Networks" <https://arxiv.org/abs/1902.07153>`_ paper

    .. math::
        \mathbf{X}^{\prime} = {\left(\mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
        \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X} \mathbf{\Theta},

    where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
    adjacency matrix with inserted self-loops and
    :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

    Args:
        in_channels (int): Size of each input sample.
        out_channels (int): Size of each output sample.
        K (int, optional): Number of hops :math:`K`. (default: :obj:`1`)
        cached (bool, optional): If set to :obj:`True`, the layer will cache
            the computation of :math:`{\left(\mathbf{\hat{D}}^{-1/2}
            \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X}` on
            first execution, and will use the cached version for further
            executions.
            This parameter should only be set to :obj:`True` in transductive
            learning scenarios. (default: :obj:`False`)
        add_self_loops (bool, optional): If set to :obj:`False`, will not add
            self-loops to the input graph. (default: :obj:`True`)
        bias (bool, optional): If set to :obj:`False`, the layer will not learn
            an additive bias. (default: :obj:`True`)
        **kwargs (optional): Additional arguments of
            :class:`torch_geometric.nn.conv.MessagePassing`.
    """

    _cached_x: Optional[Tensor]

    def __init__(self, in_channels: int, out_channels: int, K: int = 1,
                 cached: bool = False, add_self_loops: bool = True,
                 bias: bool = True, dropout: float = 0.05, **kwargs):
        super(SGConv, self).__init__(aggr='add', **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.K = K
        self.cached = cached
        self.add_self_loops = add_self_loops
        self.dropout = dropout

        self._cached_x = None

        #self.lin = Linear(in_channels, out_channels, bias=bias)

        self.reset_parameters()

    def reset_parameters(self):
        #self.lin.reset_parameters()
        self._cached_x = None

    def forward(self, x: Tensor, edge_index: Adj,
                edge_weight: OptTensor = None) -> Tensor:
        """"""
        cache = self._cached_x
        if cache is None:
            if isinstance(edge_index, Tensor):
                edge_index, edge_weight = gcn_norm(  # yapf: disable
                    edge_index, edge_weight, x.size(self.node_dim), False,
                    self.add_self_loops, dtype=x.dtype)
            elif isinstance(edge_index, SparseTensor):
                edge_index = gcn_norm(  # yapf: disable
                    edge_index, edge_weight, x.size(self.node_dim), False,
                    self.add_self_loops, dtype=x.dtype)

            x = F.normalize(x,dim=1, p=2)
            alpha = 0.05
            output = alpha*x
            for k in range(self.K):
                x = self.propagate(edge_index, x=x, edge_weight=edge_weight,
                                   size=None)
                #x_set.append(x)
                output = output + (1./self.K)*x
            #x = torch.stack(x_set,2)
            #alpha = 0.05
            #x = (1-alpha)*torch.mean(x,2).squeeze() + alpha*x_ori
            x = output
            if self.cached:
                self._cached_x = x
        else:
            x = cache

        return x#self.lin(x)

    def message(self, x_j: Tensor, edge_weight: Tensor) -> Tensor:
        return edge_weight.view(-1, 1) * x_j

    def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor:
        return matmul(adj_t, x, reduce=self.aggr)

    def __repr__(self):
        return '{}({}, {}, K={})'.format(self.__class__.__name__,
                                         self.in_channels, self.out_channels,
                                         self.K)


class SGC(torch.nn.Module):
    def __init__(self, in_channels, out_channels, num_layers):
        super(SGC, self).__init__()
        self.conv1 = SGConv(
            in_channels, out_channels, K=num_layers, cached=True)

    def reset_parameters(self):
        self.conv1.reset_parameters()

    def forward(self, x, edge_index):
        # x, edge_index = data.x, data.edge_index
        #x = self.conv1(x, edge_index)
        return self.conv1(x, edge_index)#F.log_softmax(x, dim=1)

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(GCN, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(
            GCNConv(in_channels, hidden_channels, normalize=False))
        for _ in range(num_layers - 2):
            self.convs.append(
                GCNConv(hidden_channels, hidden_channels, normalize=False))
        self.convs.append(
            GCNConv(hidden_channels, out_channels, normalize=False))

        self.dropout = dropout

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()

    def forward(self, x, adj_t):
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, adj_t)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        return x.log_softmax(dim=-1)


class SAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(SAGE, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()

    def forward(self, x, adj_t):
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, adj_t)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        return x.log_softmax(dim=-1)


def train(model, data, train_idx, optimizer):
    model.train()

    optimizer.zero_grad()
    out = model(data.x, data.adj_t)[train_idx]
    loss = F.nll_loss(out, data.y.squeeze(1)[train_idx])
    loss.backward()
    optimizer.step()

    return loss.item()


@torch.no_grad()
def test(model, data, split_idx, evaluator):
    model.eval()

    out = model(data.x, data.adj_t)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': data.y[split_idx['train']['paper']],
        'y_pred': y_pred[split_idx['train']['paper']],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': data.y[split_idx['valid']['paper']],
        'y_pred': y_pred[split_idx['valid']['paper']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': data.y[split_idx['test']['paper']],
        'y_pred': y_pred[split_idx['test']['paper']],
    })['acc']

    return train_acc, valid_acc, test_acc

precompute_time = 0.0
def main():

    device = f'cuda:{0}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-mag')
    rel_data = dataset[0]

    # We are only interested in paper <-> paper relations.
    data = Data(
        x=rel_data.x_dict['paper'],
        edge_index=rel_data.edge_index_dict[('paper', 'cites', 'paper')],
        y=rel_data.y_dict['paper'])

    data = T.ToSparseTensor()(data)
    data.adj_t = data.adj_t.to_symmetric()

    split_idx = dataset.get_idx_split()
    train_idx = split_idx['train']['paper'].to(device)

    model = SGC(data.num_features,
                dataset.num_classes, 16,
                ).to(device)

    # Pre-compute GCN normalization.
    adj_t = data.adj_t.set_diag()
    deg = adj_t.sum(dim=1).to(torch.float)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    adj_t = deg_inv_sqrt.view(-1, 1) * adj_t * deg_inv_sqrt.view(1, -1)
    data.adj_t = adj_t

    data = data.to(device)
    precompute_time = perf_counter()
    features = model(data.x, data.adj_t)
    torch.save(features,'embedding_mag.pt')
    precompute_time = perf_counter() - precompute_time

    return precompute_time

precompute_time = main()

## SSGC

In [None]:
import time
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from utils import load_citation, sgc_precompute, set_seed
from models import get_model
from metrics import accuracy
import pickle as pkl
#from args_cora import get_citation_args
from time import perf_counter
from scipy.sparse import coo_matrix, csr_matrix
from ogb.nodeproppred import PygNodePropPredDataset
import torch_geometric.transforms as T
from normalization import fetch_normalization, row_normalize
from utils import *
from torch_geometric.data import Data

# Arguments
#args = get_citation_args()

#if args.tuned:
#    if args.model == "SGC":
#        with open("{}-tuning/{}.txt".format(args.model, args.dataset), 'rb') as f:
#            args.weight_decay = pkl.load(f)['weight_decay']
#            print("using tuned weight decay: {}".format(args.weight_decay))
#    else:
#        raise NotImplemented

# setting random seeds
#set_seed(args.seed, args.cuda)

dataset = PygNodePropPredDataset(name='ogbn-mag')
data = dataset[0]

data = Data(
    x=data.x_dict['paper'],
    edge_index=data.edge_index_dict[('paper', 'cites', 'paper')],
    y=data.y_dict['paper'])

data = T.ToSparseTensor()(data)

split_idx = dataset.get_idx_split()
idx_train = split_idx['train']['paper']
idx_val = split_idx['valid']['paper']
idx_test = split_idx['test']['paper']
labels = torch.LongTensor([y[0] for y in data.y.tolist()])

features = torch.load('embedding_mag.pt', map_location='cpu')

model = get_model("SGC", features.shape[1], labels.max().item()+1, 0, 0, 0)

#print("{:.4f}s".format(precompute_time))

def train_regression(model,
                     train_features, train_labels,
                     val_features, val_labels,
                     epochs=100, weight_decay=1e-05,
                     lr=0.2, dropout=0):

    optimizer = optim.Adam(model.parameters(), lr=lr,
                           weight_decay=weight_decay)
    t = perf_counter()
    best_acc_val = torch.zeros((1))
    best_loss_val = 100.
    best_model = None
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(train_features)
        loss_train = F.cross_entropy(output, train_labels)
        loss_train.backward()
        optimizer.step()
        with torch.no_grad():
            model.eval()
            output = model(val_features)
            acc_val = accuracy(output, val_labels)
            loss_val = F.cross_entropy(output, val_labels)
            if best_acc_val < acc_val:
                 best_acc_val = acc_val
            #     best_model = model
            if best_loss_val > loss_val:
                best_loss_val = loss_val
                best_model = model

    train_time = perf_counter()-t

    # with torch.no_grad():
    #     model.eval()
    #     output = model(val_features)
    #     acc_val = accuracy(output, val_labels)

    return best_model, best_acc_val, train_time

def test_regression(model, test_features, test_labels):
    model.eval()
    return accuracy(model(test_features), test_labels)

acc_test = 0
acc_val = 0
train_time = 0
for _ in range(10):
    model, acc_val_buf, train_time_buf = train_regression(model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val],
                        100, 1e-05, 0.02, 0)
    acc_test += test_regression(model, features[idx_test], labels[idx_test])
    acc_val += acc_val_buf
    train_time += train_time_buf
acc_test /= 10
acc_val /= 10
train_time /= 10

print("Validation Accuracy: {:.4f} Test Accuracy: {:.4f}".format(acc_val, acc_test))
print("Pre-compute time: {:.4f}s, train time: {:.4f}s, total: {:.4f}s".format(precompute_time, train_time, precompute_time+train_time))

## SSGC + MLP

In [None]:
import argparse

import torch
import torch.nn.functional as F

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

#from logger import Logger


class MLP(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(MLP, self).__init__()

        self.lins = torch.nn.ModuleList()
        self.lins.append(torch.nn.Linear(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.lins.append(torch.nn.Linear(hidden_channels, hidden_channels))
        self.lins.append(torch.nn.Linear(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters(self):
        for lin in self.lins:
            lin.reset_parameters()

    def forward(self, x):
        for i, lin in enumerate(self.lins[:-1]):
            x = lin(x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lins[-1](x)
        return torch.log_softmax(x, dim=-1)


def train(model, x, y_true, train_idx, optimizer):
    model.train()

    optimizer.zero_grad()
    out = model(x[train_idx])
    loss = F.nll_loss(out, y_true[train_idx].squeeze(1))
    loss.backward()
    optimizer.step()

    return loss.item()


@torch.no_grad()
def test(model, x, y_true, split_idx, evaluator):
    model.eval()

    out = model(x)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': y_true[split_idx['train']['paper']],
        'y_pred': y_pred[split_idx['train']['paper']],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': y_true[split_idx['valid']['paper']],
        'y_pred': y_pred[split_idx['valid']['paper']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': y_true[split_idx['test']['paper']],
        'y_pred': y_pred[split_idx['test']['paper']],
    })['acc']

    return train_acc, valid_acc, test_acc


def main():

    device = f'cuda:{0}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-mag')
    split_idx = dataset.get_idx_split()
    data = dataset[0]

    x = data.x_dict['paper']
    embedding = torch.load('embedding_mag.pt', map_location='cpu')
    x = torch.cat([x, embedding], dim=-1)
    x = x.to(device)

    y_true = data.y_dict['paper'].to(device)
    train_idx = split_idx['train']['paper'].to(device)

    model = MLP(x.size(-1), 256, dataset.num_classes,
                3, 0.0).to(device)

    evaluator = Evaluator(name='ogbn-mag')
    #logger = Logger(args.runs, args)

    for run in range(10):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        for epoch in range(1, 100):
            loss = train(model, x, y_true, train_idx, optimizer)
            result = test(model, x, y_true, split_idx, evaluator)
            #logger.add_result(run, result)

            if epoch % 1 == 0:
                train_acc, valid_acc, test_acc = result
                print(f'Run: {run + 1:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Loss: {loss:.4f}, '
                      f'Train: {100 * train_acc:.2f}%, '
                      f'Valid: {100 * valid_acc:.2f}%, '
                      f'Test: {100 * test_acc:.2f}%')

        #logger.print_statistics(run)
    #logger.print_statistics()

main()

# Large OGB-Products Dataset

## Embedding

In [None]:
import argparse

import torch
import torch.nn.functional as F

from torch_geometric.utils import dropout_adj
import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
from typing import Optional
from torch_geometric.typing import Adj, OptTensor

from torch import Tensor
from torch.nn import Linear
from torch_sparse import SparseTensor, matmul
#from logger import Logger
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch.nn import Linear
from torch_geometric.nn.conv import MessagePassing



class SGConv(MessagePassing):
    r"""The simple graph convolutional operator from the `"Simplifying Graph
    Convolutional Networks" <https://arxiv.org/abs/1902.07153>`_ paper

    .. math::
        \mathbf{X}^{\prime} = {\left(\mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
        \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X} \mathbf{\Theta},

    where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
    adjacency matrix with inserted self-loops and
    :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

    Args:
        in_channels (int): Size of each input sample.
        out_channels (int): Size of each output sample.
        K (int, optional): Number of hops :math:`K`. (default: :obj:`1`)
        cached (bool, optional): If set to :obj:`True`, the layer will cache
            the computation of :math:`{\left(\mathbf{\hat{D}}^{-1/2}
            \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X}` on
            first execution, and will use the cached version for further
            executions.
            This parameter should only be set to :obj:`True` in transductive
            learning scenarios. (default: :obj:`False`)
        add_self_loops (bool, optional): If set to :obj:`False`, will not add
            self-loops to the input graph. (default: :obj:`True`)
        bias (bool, optional): If set to :obj:`False`, the layer will not learn
            an additive bias. (default: :obj:`True`)
        **kwargs (optional): Additional arguments of
            :class:`torch_geometric.nn.conv.MessagePassing`.
    """

    _cached_x: Optional[Tensor]

    def __init__(self, in_channels: int, out_channels: int, K: int = 1,
                 cached: bool = False, add_self_loops: bool = True,
                 bias: bool = True, dropout: float = 0.05, **kwargs):
        super(SGConv, self).__init__(aggr='add', **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.K = K
        self.cached = cached
        self.add_self_loops = add_self_loops
        self.dropout = dropout

        self._cached_x = None

        self.lin = Linear(in_channels, out_channels, bias=bias)

        self.reset_parameters()

    def reset_parameters(self):
        #self.lin.reset_parameters()
        self._cached_x = None

    def forward(self, x: Tensor, edge_index: Adj,
                edge_weight: OptTensor = None) -> Tensor:
        """"""
        cache = self._cached_x
        if cache is None:
            if isinstance(edge_index, Tensor):
                edge_index, edge_weight = gcn_norm(  # yapf: disable
                    edge_index, edge_weight, x.size(self.node_dim), False,
                    self.add_self_loops, dtype=x.dtype)
            elif isinstance(edge_index, SparseTensor):
                edge_index = gcn_norm(  # yapf: disable
                    edge_index, edge_weight, x.size(self.node_dim), False,
                    self.add_self_loops, dtype=x.dtype)

            x_set = []
            alpha = 0.05
            output = alpha * x
            #temp_edge_index, edge_weight = dropout_adj(edge_index, 0.5)
            for k in range(self.K):
                x = self.propagate(edge_index, x=x, edge_weight=edge_weight,
                                   size=None)
                # x_set.append(x)
                output = output + (1. / self.K) * x
            # x = torch.stack(x_set,2)
            # alpha = 0.05
            # x = (1-alpha)*torch.mean(x,2).squeeze() + alpha*x_ori
            x = output
            if self.cached:
                self._cached_x = x
        else:
            x = cache

        return x#self.lin(x)

    def message(self, x_j: Tensor, edge_weight: Tensor) -> Tensor:
        return edge_weight.view(-1, 1) * x_j

    def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor:
        return matmul(adj_t, x, reduce=self.aggr)

    def __repr__(self):
        return '{}({}, {}, K={})'.format(self.__class__.__name__,
                                         self.in_channels, self.out_channels,
                                         self.K)


class SGC(torch.nn.Module):
    def __init__(self, in_channels, hidden, out_channels, num_layers, dropout):
        super(SGC, self).__init__()
        self.conv1 = SGConv(
            in_channels, hidden, K=num_layers, cached=True)
        self.lin = torch.nn.Linear(hidden, out_channels)
        self.dropout = dropout

    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.lin.reset_parameters()

    def forward(self, x, edge_index):
        # x, edge_index = data.x, data.edge_index
        #x = self.conv1(x, edge_index)
        # x = F.relu(x)
        # x = F.dropout(x, p=self.dropout, training=self.training)
        return self.conv1(x, edge_index)#F.log_softmax(x, dim=1)


class SAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(SAGE, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x, adj_t):
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, adj_t)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        return x.log_softmax(dim=-1)


def train(model, data, train_idx, optimizer):
    model.train()

    optimizer.zero_grad()
    out = model(data.x, data.adj_t)[train_idx]
    loss = F.nll_loss(out, data.y.squeeze(1)[train_idx])
    loss.backward()
    optimizer.step()

    return loss.item()


@torch.no_grad()
def test(model, data, split_idx, evaluator):
    model.eval()

    out = model(data.x, data.adj_t)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': data.y[split_idx['train']],
        'y_pred': y_pred[split_idx['train']],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': data.y[split_idx['valid']],
        'y_pred': y_pred[split_idx['valid']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': data.y[split_idx['test']],
        'y_pred': y_pred[split_idx['test']],
    })['acc']

    return train_acc, valid_acc, test_acc

precompute_time = 0.0
def main():

    device = f'cuda:{0}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-products',
                                     transform=T.ToSparseTensor())

    data = dataset[0]
    data.adj_t = data.adj_t.to_symmetric()
    data = data.to(device)

    split_idx = dataset.get_idx_split()
    train_idx = split_idx['train'].to(device)

    precompute_time = perf_counter()
    model = SGC(data.num_features, 256, dataset.num_classes, 16, 0.5).to(device)
    
    features = model(data.x, data.adj_t)
    torch.save(features,'embedding_products.pt')
    precompute_time = perf_counter() - precompute_time
    
    return precompute_time
precompute_time = main()

Processing...


Loading necessary files...
This might take a while.


## SSGC

In [None]:
import time
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from utils import load_citation, sgc_precompute, set_seed
from models import get_model
from metrics import accuracy
import pickle as pkl
#from args_cora import get_citation_args
from time import perf_counter
from scipy.sparse import coo_matrix, csr_matrix
from ogb.nodeproppred import PygNodePropPredDataset
import torch_geometric.transforms as T
from normalization import fetch_normalization, row_normalize
from utils import *

# Arguments
#args = get_citation_args()

#if args.tuned:
#    if args.model == "SGC":
#        with open("{}-tuning/{}.txt".format(args.model, args.dataset), 'rb') as f:
#            args.weight_decay = pkl.load(f)['weight_decay']
#            print("using tuned weight decay: {}".format(args.weight_decay))
#    else:
#        raise NotImplemented

# setting random seeds
#set_seed(args.seed, args.cuda)

dataset = PygNodePropPredDataset(name='ogbn-products', transform=T.ToSparseTensor())
data = dataset[0]

split_idx = dataset.get_idx_split()
idx_train = split_idx['train']
idx_val = split_idx['valid']
idx_test = split_idx['test']
labels = torch.LongTensor([y[0] for y in data.y.tolist()])

features = torch.load('embedding_products.pt', map_location='cpu')

model = get_model("SGC", features.shape[1], labels.max().item()+1, 0, 0, 0)

def train_regression(model,
                     train_features, train_labels,
                     val_features, val_labels,
                     epochs=100, weight_decay=1e-05,
                     lr=0.2, dropout=0):

    optimizer = optim.Adam(model.parameters(), lr=lr,
                           weight_decay=weight_decay)
    t = perf_counter()
    best_acc_val = torch.zeros((1))
    best_loss_val = 100.
    best_model = None
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(train_features)
        loss_train = F.cross_entropy(output, train_labels)
        loss_train.backward()
        optimizer.step()
        with torch.no_grad():
            model.eval()
            output = model(val_features)
            acc_val = accuracy(output, val_labels)
            loss_val = F.cross_entropy(output, val_labels)
            if best_acc_val < acc_val:
                 best_acc_val = acc_val
            #     best_model = model
            if best_loss_val > loss_val:
                best_loss_val = loss_val
                best_model = model

    train_time = perf_counter()-t

    # with torch.no_grad():
    #     model.eval()
    #     output = model(val_features)
    #     acc_val = accuracy(output, val_labels)

    return best_model, best_acc_val, train_time

def test_regression(model, test_features, test_labels):
    model.eval()
    return accuracy(model(test_features), test_labels)
acc_test = 0
acc_val = 0
train_time = 0
for _ in range(10):
    model, acc_val_buf, train_time_buf = train_regression(model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val],
                        1000, 1e-05, 0.02, 0)
    acc_test += test_regression(model, features[idx_test], labels[idx_test])
    acc_val += acc_val_buf
    train_time += train_time_buf
acc_test /= 10
acc_val /= 10
train_time /= 10

print("Validation Accuracy: {:.4f} Test Accuracy: {:.4f}".format(acc_val, acc_test))
print("Pre-compute time: {:.4f}s, train time: {:.4f}s, total: {:.4f}s".format(precompute_time, train_time, precompute_time+train_time))

## SSGC + MLP

In [None]:
import argparse

import torch
import torch.nn.functional as F

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

#from logger import Logger


class MLP(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(MLP, self).__init__()

        self.lins = torch.nn.ModuleList()
        self.lins.append(torch.nn.Linear(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.lins.append(torch.nn.Linear(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.lins.append(torch.nn.Linear(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters(self):
        for lin in self.lins:
            lin.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x):
        for i, lin in enumerate(self.lins[:-1]):
            x = lin(x)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lins[-1](x)
        return torch.log_softmax(x, dim=-1)


def train(model, x, y_true, train_idx, optimizer):
    model.train()

    optimizer.zero_grad()
    out = model(x[train_idx])
    loss = F.nll_loss(out, y_true.squeeze(1)[train_idx])
    loss.backward()
    optimizer.step()

    return loss.item()


@torch.no_grad()
def test(model, x, y_true, split_idx, evaluator):
    model.eval()

    out = model(x)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': y_true[split_idx['train']],
        'y_pred': y_pred[split_idx['train']],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': y_true[split_idx['valid']],
        'y_pred': y_pred[split_idx['valid']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': y_true[split_idx['test']],
        'y_pred': y_pred[split_idx['test']],
    })['acc']

    return train_acc, valid_acc, test_acc


def main():

    device = f'cuda:{0}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-products')
    split_idx = dataset.get_idx_split()
    data = dataset[0]

    x = data.x
    embedding = torch.load('embedding_products.pt', map_location='cpu')
    #x = torch.cat([x, embedding], dim=-1)
    x = embedding
    x = x.to(device)

    y_true = data.y.to(device)
    train_idx = split_idx['train'].to(device)

    model = MLP(x.size(-1), 256, dataset.num_classes,
                3, 0.5).to(device)

    evaluator = Evaluator(name='ogbn-arxiv')
    #logger = Logger(args.runs, args)

    for run in range(10):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        for epoch in range(1, 1 + 500):
            loss = train(model, x, y_true, train_idx, optimizer)
            result = test(model, x, y_true, split_idx, evaluator)
            #logger.add_result(run, result)

            if epoch % 1 == 0:
                train_acc, valid_acc, test_acc = result
                print(f'Run: {run + 1:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Loss: {loss:.4f}, '
                      f'Train: {100 * train_acc:.2f}%, '
                      f'Valid: {100 * valid_acc:.2f}%, '
                      f'Test: {100 * test_acc:.2f}%')

        #logger.print_statistics(run)
    #logger.print_statistics()

main()

This will download 1.38GB. Will you proceed? (y/N)
y
Downloading http://snap.stanford.edu/ogb/data/nodeproppred/products.zip


Downloaded 1.38 GB: 100%|██████████| 1414/1414 [02:03<00:00, 11.44it/s]


Extracting dataset/products.zip


Processing...


Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]


Converting graphs into PyG objects...


100%|██████████| 1/1 [00:00<00:00, 293.18it/s]


Saving...


Done!


FileNotFoundError: ignored

# Large Reddit Dataset / Community Prediction

## Embedding

In [None]:
import argparse
from time import perf_counter

import torch
import torch.nn.functional as F

from torch_geometric.utils import dropout_adj
import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
from typing import Optional
from torch_geometric.typing import Adj, OptTensor

from torch import Tensor
from torch.nn import Linear
from torch_sparse import SparseTensor, matmul
#from logger import Logger
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch.nn import Linear
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.datasets import Reddit



class SGConv(MessagePassing):
    r"""The simple graph convolutional operator from the `"Simplifying Graph
    Convolutional Networks" <https://arxiv.org/abs/1902.07153>`_ paper

    .. math::
        \mathbf{X}^{\prime} = {\left(\mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
        \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X} \mathbf{\Theta},

    where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
    adjacency matrix with inserted self-loops and
    :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

    Args:
        in_channels (int): Size of each input sample.
        out_channels (int): Size of each output sample.
        K (int, optional): Number of hops :math:`K`. (default: :obj:`1`)
        cached (bool, optional): If set to :obj:`True`, the layer will cache
            the computation of :math:`{\left(\mathbf{\hat{D}}^{-1/2}
            \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X}` on
            first execution, and will use the cached version for further
            executions.
            This parameter should only be set to :obj:`True` in transductive
            learning scenarios. (default: :obj:`False`)
        add_self_loops (bool, optional): If set to :obj:`False`, will not add
            self-loops to the input graph. (default: :obj:`True`)
        bias (bool, optional): If set to :obj:`False`, the layer will not learn
            an additive bias. (default: :obj:`True`)
        **kwargs (optional): Additional arguments of
            :class:`torch_geometric.nn.conv.MessagePassing`.
    """

    _cached_x: Optional[Tensor]

    def __init__(self, in_channels: int, out_channels: int, K: int = 1,
                 cached: bool = False, add_self_loops: bool = True,
                 bias: bool = True, dropout: float = 0.05, **kwargs):
        super(SGConv, self).__init__(aggr='add', **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.K = K
        self.cached = cached
        self.add_self_loops = add_self_loops
        self.dropout = dropout

        self._cached_x = None

        self.lin = Linear(in_channels, out_channels, bias=bias)

        self.reset_parameters()

    def reset_parameters(self):
        #self.lin.reset_parameters()
        self._cached_x = None

    def forward(self, x: Tensor, edge_index: Adj,
                edge_weight: OptTensor = None) -> Tensor:
        """"""
        cache = self._cached_x
        if cache is None:
            if isinstance(edge_index, Tensor):
                edge_index, edge_weight = gcn_norm(  # yapf: disable
                    edge_index, edge_weight, x.size(self.node_dim), False,
                    self.add_self_loops, dtype=x.dtype)
            elif isinstance(edge_index, SparseTensor):
                edge_index = gcn_norm(  # yapf: disable
                    edge_index, edge_weight, x.size(self.node_dim), False,
                    self.add_self_loops, dtype=x.dtype)

            x_set = []
            alpha = 0.05
            output = alpha * x
            #temp_edge_index, edge_weight = dropout_adj(edge_index, 0.5)
            for k in range(self.K):
                x = self.propagate(edge_index, x=x, edge_weight=edge_weight,
                                   size=None)
                # x_set.append(x)
                output = output + (1. / self.K) * x
            # x = torch.stack(x_set,2)
            # alpha = 0.05
            # x = (1-alpha)*torch.mean(x,2).squeeze() + alpha*x_ori
            x = output
            if self.cached:
                self._cached_x = x
        else:
            x = cache

        return x#self.lin(x)

    def message(self, x_j: Tensor, edge_weight: Tensor) -> Tensor:
        return edge_weight.view(-1, 1) * x_j

    def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor:
        return matmul(adj_t, x, reduce=self.aggr)

    def __repr__(self):
        return '{}({}, {}, K={})'.format(self.__class__.__name__,
                                         self.in_channels, self.out_channels,
                                         self.K)


class SGC(torch.nn.Module):
    def __init__(self, in_channels, hidden, out_channels, num_layers, dropout):
        super(SGC, self).__init__()
        self.conv1 = SGConv(
            in_channels, hidden, K=num_layers, cached=True)
        self.lin = torch.nn.Linear(hidden, out_channels)
        self.dropout = dropout

    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.lin.reset_parameters()

    def forward(self, x, edge_index):
        # x, edge_index = data.x, data.edge_index
        #x = self.conv1(x, edge_index)
        # x = F.relu(x)
        # x = F.dropout(x, p=self.dropout, training=self.training)
        return self.conv1(x, edge_index)#F.log_softmax(x, dim=1)


class SAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(SAGE, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x, adj_t):
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, adj_t)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        return x.log_softmax(dim=-1)


def train(model, data, train_idx, optimizer):
    model.train()

    optimizer.zero_grad()
    out = model(data.x, data.adj_t)[train_idx]
    loss = F.nll_loss(out, data.y.squeeze(1)[train_idx])
    loss.backward()
    optimizer.step()

    return loss.item()


@torch.no_grad()
def test(model, data, split_idx, evaluator):
    model.eval()

    out = model(data.x, data.adj_t)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': data.y[split_idx['train']],
        'y_pred': y_pred[split_idx['train']],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': data.y[split_idx['valid']],
        'y_pred': y_pred[split_idx['valid']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': data.y[split_idx['test']],
        'y_pred': y_pred[split_idx['test']],
    })['acc']

    return train_acc, valid_acc, test_acc

precompute_time = 0.0
def main():

    device = f'cuda:{0}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    #data = dataset[0]
    dataset = Reddit("./dataset/reddit")
    data = dataset[0]
    data = T.ToSparseTensor()(data)
    
    #data.adj_t = data.adj_t.to_symmetric()
    data = data.to(device)

    # split_idx = dataset.get_idx_split()

    idx_train = torch.tensor([i for i, is_train in enumerate(data.train_mask) if is_train]).to(device)
    idx_val = torch.tensor([i for i, is_valid in enumerate(data.val_mask) if is_valid]).to(device)
    idx_test = torch.tensor([i for i, is_test in enumerate(data.test_mask) if is_test]).to(device)

    # train_idx = split_idx['train'].to(device)

    precompute_time = perf_counter()
    model = SGC(data.num_features, 256, dataset.num_classes, 16, 0.5).to(device)
    
    features = model(data.x, data.adj_t)
    torch.save(features,'embedding_reddit.pt')
    precompute_time = perf_counter() - precompute_time

    return precompute_time

main()

29.876081878999912

## SSGC

In [None]:
import time
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from utils import load_citation, sgc_precompute, set_seed
from models import get_model
from metrics import accuracy, f1
import pickle as pkl
#from args_cora import get_citation_args
from time import perf_counter
from scipy.sparse import coo_matrix, csr_matrix
from ogb.nodeproppred import PygNodePropPredDataset
import torch_geometric.transforms as T
from normalization import fetch_normalization, row_normalize
from utils import *
from torch_geometric.datasets import Reddit

# Arguments
#args = get_citation_args()

#if args.tuned:
#    if args.model == "SGC":
#        with open("{}-tuning/{}.txt".format(args.model, args.dataset), 'rb') as f:
#            args.weight_decay = pkl.load(f)['weight_decay']
#            print("using tuned weight decay: {}".format(args.weight_decay))
#    else:
#        raise NotImplemented

# setting random seeds
#set_seed(args.seed, args.cuda)

dataset = Reddit("./dataset/reddit")
data = dataset[0]

data = T.ToSparseTensor()(data)

# split_idx = dataset.get_idx_split()
# idx_train = split_idx['train']
# idx_val = split_idx['valid']
# idx_test = split_idx['test']
idx_train = torch.tensor([i for i, is_train in enumerate(data.train_mask) if is_train])
idx_val = torch.tensor([i for i, is_valid in enumerate(data.val_mask) if is_valid])
idx_test = torch.tensor([i for i, is_test in enumerate(data.test_mask) if is_test])
# labels = torch.LongTensor([y[0] for y in data.y.tolist()])
labels = data.y

features = torch.load('embedding_reddit.pt', map_location='cpu')

model = get_model("SGC", features.shape[1], labels.max().item()+1, 0, 0, 0)

def train_regression(model,
                     train_features, train_labels,
                     val_features, val_labels,
                     epochs=100, weight_decay=1e-05,
                     lr=0.2, dropout=0):

    optimizer = optim.Adam(model.parameters(), lr=lr,
                           weight_decay=weight_decay)
    t = perf_counter()
    best_acc_val = torch.zeros((1))
    best_loss_val = 100.
    best_model = None
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(train_features)
        loss_train = F.cross_entropy(output, train_labels)
        loss_train.backward()
        optimizer.step()
        with torch.no_grad():
            model.eval()
            output = model(val_features)
            acc_val = accuracy(output, val_labels)
            loss_val = F.cross_entropy(output, val_labels)
            if best_acc_val < acc_val:
                 best_acc_val = acc_val
            #     best_model = model
            if best_loss_val > loss_val:
                best_loss_val = loss_val
                best_model = model

        if epoch % 10 == 0:
            print(f'epoch: {epoch + 1} finished')

    train_time = perf_counter()-t

    # with torch.no_grad():
    #     model.eval()
    #     output = model(val_features)
    #     acc_val = accuracy(output, val_labels)

    return best_model, best_acc_val, train_time

def test_regression(model, test_features, test_labels):
    model.eval()
    # return accuracy(model(test_features), test_labels)
    return f1(model(test_features), test_labels)[0]

NUM_EPOCHS = 200
NUM_RUNS = 5

acc_test = 0
acc_val = 0
train_time = 0
for run in range(NUM_RUNS):
    model, acc_val_buf, train_time_buf = train_regression(model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val],
                        NUM_EPOCHS, 1e-05, 0.02, 0)
    acc_test += test_regression(model, features[idx_test], labels[idx_test])
    acc_val += acc_val_buf
    train_time += train_time_buf

    print(f'run: {run + 1} finished')

acc_test /= NUM_RUNS
acc_val /= NUM_RUNS
train_time /= NUM_RUNS

print("Validation Accuracy: {:.4f} Test F1: {:.4f}".format(acc_val, acc_test))
print("Pre-compute time: {:.4f}s, train time: {:.4f}s, total: {:.4f}s".format(precompute_time, train_time, precompute_time+train_time))

epoch: 1 finished
epoch: 11 finished
epoch: 21 finished
epoch: 31 finished
epoch: 41 finished
epoch: 51 finished
epoch: 61 finished
epoch: 71 finished
epoch: 81 finished
epoch: 91 finished
epoch: 101 finished
epoch: 111 finished
epoch: 121 finished
epoch: 131 finished
epoch: 141 finished
epoch: 151 finished
epoch: 161 finished
epoch: 171 finished
epoch: 181 finished
epoch: 191 finished
run: 1 finished
epoch: 1 finished
epoch: 11 finished
epoch: 21 finished
epoch: 31 finished
epoch: 41 finished
epoch: 51 finished
epoch: 61 finished
epoch: 71 finished
epoch: 81 finished
epoch: 91 finished
epoch: 101 finished
epoch: 111 finished
epoch: 121 finished
epoch: 131 finished
epoch: 141 finished
epoch: 151 finished
epoch: 161 finished
epoch: 171 finished
epoch: 181 finished
epoch: 191 finished
run: 2 finished
epoch: 1 finished
epoch: 11 finished
epoch: 21 finished
epoch: 31 finished
epoch: 41 finished
epoch: 51 finished
epoch: 61 finished
epoch: 71 finished
epoch: 81 finished
epoch: 91 finished
e

# Node Clustering

In [None]:
import scipy.io as sio
import time
import tensorflow as tf
import numpy as np
import scipy.sparse as sp
from sklearn.cluster import KMeans
from NodeClustering.metrics import clustering_metrics
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.preprocessing import normalize
def normalize_adj(adj, type='sym'):
    """Symmetrically normalize adjacency matrix."""
    if type == 'sym':
        adj = sp.coo_matrix(adj)
        rowsum = np.array(adj.sum(1))
        # d_inv_sqrt = np.power(rowsum, -0.5)
        # d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
        # return adj*d_inv_sqrt*d_inv_sqrt.flatten()
        d_inv_sqrt = np.power(rowsum, -0.5).flatten()
        d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
        d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
        return d_mat_inv_sqrt.dot(adj).dot(d_mat_inv_sqrt).tocoo()
    elif type == 'rw':
        rowsum = np.array(adj.sum(1))
        d_inv = np.power(rowsum, -1.0).flatten()
        d_inv[np.isinf(d_inv)] = 0.
        d_mat_inv = sp.diags(d_inv)
        adj_normalized = d_mat_inv.dot(adj)
        return adj_normalized


def preprocess_adj(adj, type='sym', loop=True):
    """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
    if loop:
        adj = adj + sp.eye(adj.shape[0])
    adj_normalized = normalize_adj(adj, type=type)
    return adj_normalized


def to_onehot(prelabel):
    k = len(np.unique(prelabel))
    label = np.zeros([prelabel.shape[0], k])
    label[range(prelabel.shape[0]), prelabel] = 1
    label = label.T
    return label


def square_dist(prelabel, feature):
    if sp.issparse(feature):
        feature = feature.todense()
    feature = np.array(feature)


    onehot = to_onehot(prelabel)

    m, n = onehot.shape
    count = onehot.sum(1).reshape(m, 1)
    count[count==0] = 1

    mean = onehot.dot(feature)/count
    a2 = (onehot.dot(feature*feature)/count).sum(1)
    pdist2 = np.array(a2 + a2.T - 2*mean.dot(mean.T))

    intra_dist = pdist2.trace()
    inter_dist = pdist2.sum() - intra_dist
    intra_dist /= m
    inter_dist /= m * (m - 1)
    return intra_dist

def dist(prelabel, feature):
    k = len(np.unique(prelabel))
    intra_dist = 0

    for i in range(k):
        Data_i = feature[np.where(prelabel == i)]

        Dis = euclidean_distances(Data_i, Data_i)
        n_i = Data_i.shape[0]
        if n_i == 0 or n_i == 1:
            intra_dist = intra_dist
        else:
            intra_dist = intra_dist + 1 / k * 1 / (n_i * (n_i - 1)) * sum(sum(Dis))


    return intra_dist

datasets = ['cora', 'citeseer', 'pubmed', 'wiki']
for dataset in datasets:
    
    #for i in range(10): dont have the computational capacities
    data = sio.loadmat('./NodeClustering/{}.mat'.format(dataset))
    feature = data['fea']
    if sp.issparse(feature):
        feature = feature.todense()

    adj = data['W']
    gnd = data['gnd']
    gnd = gnd.T
    gnd = gnd - 1
    gnd = gnd[0, :]
    k = len(np.unique(gnd))
    adj = sp.coo_matrix(adj)
    intra_list = []
    intra_list.append(10000)


    acc_list = []
    nmi_list = []
    f1_list = []
    stdacc_list = []
    stdnmi_list = []
    stdf1_list = []
    max_iter = 60
    rep = 10
    t = time.time()
    #adj_normalized = preprocess_adj(adj)
    #adj_normalized = (sp.eye(adj_normalized.shape[0]) + adj_normalized) / 2
    adj_normalized = preprocess_adj(adj,loop=False)
    # adj_normalized = (sp.eye(adj_normalized.shape[0]) + adj_normalized + adj_normalized.dot(adj_normalized)) / 3
    total_dist = []

    tt = 0
    alpha = 0.05 #0.05 for pubmed, 0.1 for citeseer
    feature_ori = feature.astype('float64')
    #emb = feature.astype('float64')
    #emb = feature.astype('float64')
    emb = np.zeros_like(feature).astype('float64')
    oneV = np.ones(feature.shape[0])
    den = np.zeros_like(oneV)
    while 1:
        tt = tt + 1
        power = tt
        intraD = np.zeros(rep)


        ac = np.zeros(rep)
        nm = np.zeros(rep)
        f1 = np.zeros(rep)



        feature = adj_normalized.dot(feature)

        emb += feature
        emb_norm = emb/tt
        u, s, v = sp.linalg.svds(emb_norm, k=16, which='LM')
        u = normalize(emb_norm.dot(v.T))




        for i in range(rep):
            kmeans = KMeans(n_clusters=k).fit(u)
            predict_labels = kmeans.predict(u)
            intraD[i] = square_dist(predict_labels, u)
            #intraD[i] = dist(predict_labels, feature)
            cm = clustering_metrics(gnd, predict_labels)
            ac[i], nm[i], f1[i] = cm.evaluationClusterModelFromLabel()

        intramean = np.mean(intraD)
        acc_means = np.mean(ac)
        acc_stds = np.std(ac)
        nmi_means = np.mean(nm)
        nmi_stds = np.std(nm)
        f1_means = np.mean(f1)
        f1_stds = np.std(f1)

        intra_list.append(intramean)
        acc_list.append(acc_means)
        stdacc_list.append(acc_stds)
        nmi_list.append(nmi_means)
        stdnmi_list.append(nmi_stds)
        f1_list.append(f1_means)
        stdf1_list.append(f1_stds)
        #print('power: {}'.format(power),
        #      'intra_dist: {}'.format(intramean),
        #      'acc_mean: {}'.format(acc_means),
        #      'acc_std: {}'.format(acc_stds),
        #      'nmi_mean: {}'.format(nmi_means),
        #      'nmi_std: {}'.format(nmi_stds),
        #      'f1_mean: {}'.format(f1_means),
        #      'f1_std: {}'.format(f1_stds))

        if intra_list[tt] > intra_list[tt - 1] or tt > max_iter:
            #print('bestpower: {}'.format(tt - 1))
            t = time.time() - t
            #print(t)
            break
    print("dataset:", dataset, "acc:", str(sum(acc_list)/len(acc_list)), " nmi:", str(sum(nmi_list)/len(nmi_list)), "f1-score: ", str(sum(f1_list)/len(f1_list)))

dataset: cora acc: 0.6652511078286558  nmi: 0.5109362256429308 f1-score:  0.6312434441582484
dataset: citeseer acc: 0.687883064779603  nmi: 0.42606741824229544 f1-score:  0.6443867309397274
dataset: pubmed acc: 0.6967810917931353  nmi: 0.317986134801085 f1-score:  0.6907845693766443




dataset: wiki acc: 0.5187281399046105  nmi: 0.48933923797813084 f1-score:  0.4437493461822928


# Document Classification
Here we also have the problem that our resources are not sufficient enough to load the matrices into memory. To battle this problem the author even said that he generated and linked the preprocessed files he used to do the experiments. However, the link to these preprocessed files is not a valid one (he linked his own google drive path, which we cannot access). Link given: https://drive.google.com/drive/u/0/my-drive
<br>
<br>
The results that we could observe (2/5) are successfully achieving the results described in the paper.

In [None]:
import time
import argparse
import numpy as np
import pickle
import os
from copy import deepcopy
import torch
import torch.nn.functional as F
import torch.optim as optim
import tabulate
from functools import partial
from DocumentClassification.utils import *
from DocumentClassification.models import SGC


#torch.backends.cudnn.benchmark = True
#set_seed(args.seed, args.cuda)

def train_linear(model, feat_dict, weight_decay, binary=False):
    if not binary:
        act = partial(F.log_softmax, dim=1)
        criterion = F.nll_loss
    else:
        act = torch.sigmoid
        criterion = F.binary_cross_entropy
    optimizer = optim.LBFGS(model.parameters())
    best_val_loss = float('inf')
    best_val_acc = 0
    plateau = 0
    start = time.perf_counter()
    for epoch in range(10):
        def closure():
            optimizer.zero_grad()
            #output = model(feat_dict["train"].cuda()).squeeze()
            output = model(feat_dict["train"]).squeeze()
            l2_reg = 0.5*weight_decay*(model.W.weight**2).sum()
            #loss = criterion(act(output), label_dict["train"].cuda())+l2_reg
            loss = criterion(act(output), label_dict["train"])+l2_reg
            loss.backward()
            return loss

        optimizer.step(closure)

    train_time = time.perf_counter()-start
    val_res = eval_linear(model, feat_dict["val"],
                          label_dict["val"], binary)
    # val_res = eval_linear(model, feat_dict["val"],
    #                       label_dict["val"].cuda(), binary)
    return val_res['accuracy'], model, train_time

def eval_linear(model, features, label, binary=False):
    model.eval()
    if not binary:
        act = partial(F.log_softmax, dim=1)
        criterion = F.nll_loss
    else:
        act = torch.sigmoid
        criterion = F.binary_cross_entropy

    with torch.no_grad():
        output = model(features).squeeze()
        loss = criterion(act(output), label)
        if not binary: predict_class = output.max(1)[1]
        else: predict_class = act(output).gt(0.5).float()
        correct = torch.eq(predict_class, label).long().sum().item()
        acc = correct/predict_class.size(0)

    return {
        'loss': loss.item(),
        'accuracy': acc
    }

In [None]:
dataset = 'R8'

sp_adj, index_dict, label_dict = load_corpus(dataset)
for k, v in label_dict.items():
    if dataset == "mr":
        label_dict[k] = torch.Tensor(v)
    else:
        label_dict[k] = torch.LongTensor(v)
features = torch.arange(sp_adj.shape[0])

adj = sparse_to_torch_sparse(sp_adj, device="cpu")

if dataset == "mr": nclass = 1
else: nclass = label_dict["train"].max().item()+1
#if not args.preprocessed:
adj_dense = sparse_to_torch_dense(sp_adj, device='cpu')
feat_dict, precompute_time = sgc_precompute(adj, adj_dense, 5-1, index_dict)
#else:
#    # load the relased degree 2 features
#    with open(os.path.join("preprocessed",
#        "{}.pkl".format(args.dataset)), "rb") as prep:
#        feat_dict =  pkl.load(prep)
#    precompute_time = 0

model = SGC(nfeat=feat_dict["train"].size(1),
            nclass=nclass)

model
val_acc, best_model, train_time = train_linear(model, feat_dict, 0, dataset=="mr")
test_res = eval_linear(best_model, feat_dict["test"],
                       label_dict["test"], dataset=="mr")
train_res = eval_linear(best_model, feat_dict["train"],
                        label_dict["train"], dataset=="mr")
print("Total Time: {:2f}s, Train acc: {:.4f}, Val acc: {:.4f}, Test acc: {:.4f}".format(precompute_time+train_time, train_res["accuracy"], val_acc, test_res["accuracy"]))

cpu
Total Time: 97.069985s, Train acc: 1.0000, Val acc: 0.9471, Test acc: 0.9557


In [None]:
dataset = 'R52'

sp_adj, index_dict, label_dict = load_corpus(dataset)
for k, v in label_dict.items():
    if dataset == "mr":
        label_dict[k] = torch.Tensor(v)
    else:
        label_dict[k] = torch.LongTensor(v)
features = torch.arange(sp_adj.shape[0])

adj = sparse_to_torch_sparse(sp_adj, device="cpu")

if dataset == "mr": nclass = 1
else: nclass = label_dict["train"].max().item()+1
#if not args.preprocessed:
adj_dense = sparse_to_torch_dense(sp_adj, device='cpu')
feat_dict, precompute_time = sgc_precompute(adj, adj_dense, 5-1, index_dict)
#else:
#    # load the relased degree 2 features
#    with open(os.path.join("preprocessed",
#        "{}.pkl".format(args.dataset)), "rb") as prep:
#        feat_dict =  pkl.load(prep)
#    precompute_time = 0

model = SGC(nfeat=feat_dict["train"].size(1),
            nclass=nclass)

model
val_acc, best_model, train_time = train_linear(model, feat_dict, 0, dataset=="mr")
test_res = eval_linear(best_model, feat_dict["test"],
                       label_dict["test"], dataset=="mr")
train_res = eval_linear(best_model, feat_dict["train"],
                        label_dict["train"], dataset=="mr")
print("Total Time: {:2f}s, Train acc: {:.4f}, Val acc: {:.4f}, Test acc: {:.4f}".format(precompute_time+train_time, train_res["accuracy"], val_acc, test_res["accuracy"]))

cpu
Total Time: 292.449571s, Train acc: 1.0000, Val acc: 0.9265, Test acc: 0.9330


In [None]:
dataset = 'ohsumed'

sp_adj, index_dict, label_dict = load_corpus(dataset)
for k, v in label_dict.items():
    if dataset == "mr":
        label_dict[k] = torch.Tensor(v)
    else:
        label_dict[k] = torch.LongTensor(v)
features = torch.arange(sp_adj.shape[0])

adj = sparse_to_torch_sparse(sp_adj, device="cpu")

if dataset == "mr": nclass = 1
else: nclass = label_dict["train"].max().item()+1
#if not args.preprocessed:
adj_dense = sparse_to_torch_dense(sp_adj, device='cpu')
feat_dict, precompute_time = sgc_precompute(adj, adj_dense, 5-1, index_dict)
#else:
#    # load the relased degree 2 features
#    with open(os.path.join("preprocessed",
#        "{}.pkl".format(args.dataset)), "rb") as prep:
#        feat_dict =  pkl.load(prep)
#    precompute_time = 0

model = SGC(nfeat=feat_dict["train"].size(1),
            nclass=nclass)

model
val_acc, best_model, train_time = train_linear(model, feat_dict, 0, dataset=="mr")
test_res = eval_linear(best_model, feat_dict["test"],
                       label_dict["test"], dataset=="mr")
train_res = eval_linear(best_model, feat_dict["train"],
                        label_dict["train"], dataset=="mr")
print("Total Time: {:2f}s, Train acc: {:.4f}, Val acc: {:.4f}, Test acc: {:.4f}".format(precompute_time+train_time, train_res["accuracy"], val_acc, test_res["accuracy"]))

cpu
Total Time: 232.598604s, Train acc: 1.0000, Val acc: 0.6776, Test acc: 0.6579


In [None]:
dataset = 'mr'

sp_adj, index_dict, label_dict = load_corpus(dataset)
for k, v in label_dict.items():
    if dataset == "mr":
        label_dict[k] = torch.Tensor(v)
    else:
        label_dict[k] = torch.LongTensor(v)
features = torch.arange(sp_adj.shape[0])

adj = sparse_to_torch_sparse(sp_adj, device="cpu")

if dataset == "mr": nclass = 1
else: nclass = label_dict["train"].max().item()+1
#if not args.preprocessed:
adj_dense = sparse_to_torch_dense(sp_adj, device='cpu')
feat_dict, precompute_time = sgc_precompute(adj, adj_dense, 5-1, index_dict)
#else:
#    # load the relased degree 2 features
#    with open(os.path.join("preprocessed",
#        "{}.pkl".format(args.dataset)), "rb") as prep:
#        feat_dict =  pkl.load(prep)
#    precompute_time = 0

model = SGC(nfeat=feat_dict["train"].size(1),
            nclass=nclass)

model
val_acc, best_model, train_time = train_linear(model, feat_dict, 0, dataset=="mr")
test_res = eval_linear(best_model, feat_dict["test"],
                       label_dict["test"], dataset=="mr")
train_res = eval_linear(best_model, feat_dict["train"],
                        label_dict["train"], dataset=="mr")
print("Total Time: {:2f}s, Train acc: {:.4f}, Val acc: {:.4f}, Test acc: {:.4f}".format(precompute_time+train_time, train_res["accuracy"], val_acc, test_res["accuracy"]))

cpu
Total Time: 84.841316s, Train acc: 1.0000, Val acc: 0.7042, Test acc: 0.6998


In [None]:
dataset = '20ng'

sp_adj, index_dict, label_dict = load_corpus(dataset)
for k, v in label_dict.items():
    if dataset == "mr":
        label_dict[k] = torch.Tensor(v)
    else:
        label_dict[k] = torch.LongTensor(v)
features = torch.arange(sp_adj.shape[0])

adj = sparse_to_torch_sparse(sp_adj, device="cpu")

if dataset == "mr": nclass = 1
else: nclass = label_dict["train"].max().item()+1
#if not args.preprocessed:
adj_dense = sparse_to_torch_dense(sp_adj, device='cpu')
feat_dict, precompute_time = sgc_precompute(adj, adj_dense, 5-1, index_dict)
#else:
#    # load the relased degree 2 features
#    with open(os.path.join("preprocessed",
#        "{}.pkl".format(args.dataset)), "rb") as prep:
#        feat_dict =  pkl.load(prep)
#    precompute_time = 0

model = SGC(nfeat=feat_dict["train"].size(1),
            nclass=nclass)

model
val_acc, best_model, train_time = train_linear(model, feat_dict, 0, dataset=="mr")
test_res = eval_linear(best_model, feat_dict["test"],
                       label_dict["test"], dataset=="mr")
train_res = eval_linear(best_model, feat_dict["train"],
                        label_dict["train"], dataset=="mr")
print("Total Time: {:2f}s, Train acc: {:.4f}, Val acc: {:.4f}, Test acc: {:.4f}".format(precompute_time+train_time, train_res["accuracy"], val_acc, test_res["accuracy"]))

cpu
