# Node Classification

In [2]:
import logging
import os
import argparse
import configparser

import random
import time

import numpy as np
import scipy.sparse as sp

import torch
import torch.nn as nn
import torch.optim as optim

from script import dataloader, utility, earlystopping
from model import models

import nni
from torch_geometric.data import Data
import torch_geometric.transforms as T

config = configparser.ConfigParser()
def ConfigSectionMap(section):
    dict1 = {}
    options = config.options(section)
    for option in options:
        try:
            dict1[option] = config.get(section, option)
            if dict1[option] == -1:
                logging.debug('skip: %s' % option)
        except:
            print('exception on %s!' % option)
            dict1[option] = None
    return dict1

def get_parameters(dataset):
    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    
    if dataset == "cora":
        dataset_config_path = './config/data/cora.ini'
    elif dataset == "citeseer":
        dataset_config_path = './config/data/citeseer.ini'
    elif dataset == "pubmed":
        dataset_config_path = './config/data/pubmed.ini'
        
    model_config_path = './config/model/ssgc_sym.ini'

    config.read(dataset_config_path, encoding='utf-8')
    dataset_name = ConfigSectionMap('dataset')['name']
    data_path = './data/'
    graph_path = None
    learning_rate = float(ConfigSectionMap('model')['learning_rate'])
    weight_decay_rate = float(ConfigSectionMap('model')['weight_decay_rate'])
    model_save_path = ConfigSectionMap('model')['model_save_path']
    
    config.read(model_config_path, encoding='utf-8')
    model_name = ConfigSectionMap('model')['name']
    renorm_adj_type = ConfigSectionMap('gconv')['renorm_adj_type']
    
    alpha = 0.05
    K = 16
    enable_bias = True
    epochs = 100#reduced because otherwise it takes too long (original 10000)
    opt = "Adam"
    early_stopping_patience = 10
    
    model_save_path = model_save_path + model_name + '_' + str(renorm_adj_type) + '_' + str(alpha) + \
                    '_alpha_' + str(K) + '_power' + '.pth'
    
    return device, dataset_name, data_path, graph_path, learning_rate, weight_decay_rate, model_name, \
        model_save_path, renorm_adj_type, alpha, K, enable_bias, epochs, opt, early_stopping_patience

def process_data(device, dataset_name, data_path, graph_path, renorm_adj_type, alpha, K):

    if dataset_name == 'cora' or dataset_name == 'citeseer' or dataset_name == 'pubmed':
        features, labels, dir_adj, idx_train, idx_val, idx_test = dataloader.load_citation_data(dataset_name, data_path)
    elif dataset_name == 'cornell' or dataset_name == 'texas' or dataset_name == 'washington' or dataset_name == 'wisconsin':
        features, dir_adj, labels, idx_train, idx_val, idx_test = dataloader.load_webkb_data(dataset_name, data_path)
    elif dataset_name == 'arxiv':
        dataset = PygNodePropPredDataset(name='ogbn-arxiv')
        data = dataset[0]
        unique_labels_cnt = len(set([y[0] for y in data.y.tolist()]))
        labels = []
        for i in range(len(data.x)):
            label = [0]*unique_labels_cnt
            label[data.y[i]] = 1
            labels.append(label)
        labels = np.array(labels)
        graph = torch_geometric.utils.to_networkx(data)
        dir_adj = nx.adjacency_matrix(graph)
        dir_adj = scipy.sparse.csc_matrix(dir_adj)
        features = data.x.numpy()
        row_sum_inv = np.power(np.sum(features, axis=1), -1)
        row_sum_inv[np.isinf(row_sum_inv)] = 0.
        norm_features = []
        for i in range(len(row_sum_inv)):
            elems = []
            for j in range(features.shape[1]):
                elems.append(row_sum_inv[i] * features[i][j]) 
            norm_features.append(elems)
        norm_features = sp.csc_matrix(norm_features)
        features = norm_features
        split_idx = dataset.get_idx_split()
        idx_train = split_idx['train']
        idx_val = split_idx['valid']
        idx_test = split_idx['test']
    elif dataset_name == 'mag':
        dataset = PygNodePropPredDataset(name='ogbn-mag')
        data = dataset[0]
        data = Data(x=data.x_dict['paper'], edge_index=data.edge_index_dict[('paper', 'cites', 'paper')], y=data.y_dict['paper'])
        unique_labels_cnt = len(set([y[0] for y in data.y.tolist()]))
        labels = []
        for i in range(len(data.x)):
            label = [0]*unique_labels_cnt
            label[data.y[i]] = 1
            labels.append(label)
        labels = np.array(labels)
        graph = torch_geometric.utils.to_networkx(data)
        dir_adj = nx.adjacency_matrix(graph)
        dir_adj = scipy.sparse.csc_matrix(dir_adj)
        features = data.x.numpy()
        row_sum_inv = np.power(np.sum(features, axis=1), -1)
        row_sum_inv[np.isinf(row_sum_inv)] = 0.
        norm_features = []
        for i in range(len(row_sum_inv)):
            elems = []
            for j in range(features.shape[1]):
                elems.append(row_sum_inv[i] * features[i][j]) 
            norm_features.append(elems)
        norm_features = sp.csc_matrix(norm_features)
        features = norm_features
        split_idx = dataset.get_idx_split()
        idx_train = split_idx['train']
        idx_val = split_idx['valid']
        idx_test = split_idx['test']
    elif dataset_name == 'reddit':
        dataset = Reddit("./dataset/reddit")
        data = dataset[0]
        unique_label_cnt = len(set(data.y.tolist()))
        labels = []
        y_aslist = data.y.tolist()
        y_len = len(data.y)
        for i in range(y_len):
            label = [0]*unique_label_cnt
            idx = y_aslist[i]
            label[idx] = 1
            labels.append(label)
        graph = torch_geometric.utils.to_networkx(data)
        dir_adj = nx.adjacency_matrix(graph)
        dir_adj = scipy.sparse.csc_matrix(dir_adj)
        features = data.x.numpy()
        row_sum_inv = np.power(np.sum(features, axis=1), -1)
        row_sum_inv[np.isinf(row_sum_inv)] = 0.
        norm_features = []
        for i in range(len(row_sum_inv)):
            elems = []
            for j in range(features.shape[1]):
                elems.append(row_sum_inv[i] * features[i][j]) 
            norm_features.append(elems)
        norm_features = sp.csc_matrix(norm_features)
        features = norm_features
        split_idx = dataset.get_idx_split()
        idx_train = split_idx['train']
        idx_val = split_idx['valid']
        idx_test = split_idx['test']
    elif dataset_name == 'products':
        dataset = PygNodePropPredDataset(name='ogbn-products')
        data = dataset[0]
        unique_labels_cnt = len(set([y[0] for y in data.y.tolist()]))
        labels = []
        for i in range(len(data.x)):
            label = [0]*unique_labels_cnt
            label[data.y[i]] = 1
            labels.append(label)
        labels = np.array(labels)
        graph = torch_geometric.utils.to_networkx(data)
        dir_adj = nx.adjacency_matrix(graph)
        dir_adj = scipy.sparse.csc_matrix(dir_adj)
        features = data.x.numpy()
        row_sum_inv = np.power(np.sum(features, axis=1), -1)
        row_sum_inv[np.isinf(row_sum_inv)] = 0.
        norm_features = []
        for i in range(len(row_sum_inv)):
            elems = []
            for j in range(features.shape[1]):
                elems.append(row_sum_inv[i] * features[i][j]) 
            norm_features.append(elems)
        norm_features = sp.csc_matrix(norm_features)
        features = norm_features
        split_idx = dataset.get_idx_split()
        idx_train = split_idx['train']
        idx_val = split_idx['valid']
        idx_test = split_idx['test']

    n_vertex, n_feat, n_labels, n_class = features.shape[0], features.shape[1], labels.shape[0], labels.shape[1]
    labels = np.argmax(labels, axis=1)

    if renorm_adj_type == 'sym':
        renorm_adj = utility.calc_sym_renorm_adj(dir_adj)
    elif renorm_adj_type == 'rw':
        renorm_adj = utility.calc_rw_renorm_adj(dir_adj)
    
    features = utility.calc_adj_mul_feat(renorm_adj, features, alpha, K)

    idx_train = torch.LongTensor(idx_train).to(device)
    idx_val = torch.LongTensor(idx_val).to(device)
    idx_test = torch.LongTensor(idx_test).to(device)

    features = torch.from_numpy(features).to(device)
    labels = torch.LongTensor(labels).to(device)

    return features, labels, idx_train, idx_val, idx_test, n_feat, n_class, n_vertex

def prepare_model(n_feat, n_class, enable_bias, early_stopping_patience, learning_rate, \
                weight_decay_rate, model_save_path, opt):
    model = models.SSGC(n_feat, n_class, enable_bias).to(device)
    loss = nn.NLLLoss()
    early_stopping = earlystopping.EarlyStopping(patience=early_stopping_patience, path=model_save_path, verbose=False)

    if opt == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay_rate)
    elif opt == 'AdamW':
        optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay_rate)
    else:
        raise ValueError(f'ERROR: optimizer {opt} is undefined.')

    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.95)

    return model, loss, early_stopping, optimizer, scheduler

def train(epochs, model, optimizer, scheduler, early_stopping, features, labels, loss, idx_train, idx_val, dataset_name):
    train_time_list = []
    for epoch in range(epochs):
        train_epoch_begin_time = time.time()
        model.train()
        optimizer.zero_grad()
        output = model(features)
        loss_train = loss(output[idx_train], labels[idx_train])
        acc_train = utility.calc_accuracy(output[idx_train], labels[idx_train])
        loss_train.backward()
        optimizer.step()
        #scheduler.step()
        train_epoch_end_time = time.time()
        train_epoch_time_duration = train_epoch_end_time - train_epoch_begin_time
        train_time_list.append(train_epoch_time_duration)

        loss_val, acc_val = val(model, labels, output, loss, idx_val)
        #print('Epoch: {:03d} | Learning rate: {:.8f} | Train loss: {:.6f} | Train acc: {:.6f} | Val loss: {:.6f} | Val acc: {:.6f} | Training duration: {:.6f}'.\
        #    format(epoch+1, optimizer.param_groups[0]['lr'], loss_train.item(), acc_train.item(), loss_val.item(), acc_val.item(), train_epoch_time_duration))
        #nni.report_intermediate_result(acc_val.item())

        early_stopping(loss_val, model)
        if early_stopping.early_stop:
            #print('Early stopping.')
            break
    
    mean_train_epoch_time_duration = np.mean(train_time_list)
    #print('\nTraining finished.\n')

    return mean_train_epoch_time_duration

def val(model, labels, output, loss, idx_val):
    model.eval()
    with torch.no_grad():
        loss_val = loss(output[idx_val], labels[idx_val])
        acc_val = utility.calc_accuracy(output[idx_val], labels[idx_val])

    return loss_val, acc_val

def test(model, model_save_path, features, labels, loss, idx_test, model_name, dataset_name, mean_train_epoch_time_duration):
    model.load_state_dict(torch.load(model_save_path))
    model.eval()
    with torch.no_grad():
        output = model(features)
        loss_test = loss(output[idx_test], labels[idx_test])
        acc_test = utility.calc_accuracy(output[idx_test], labels[idx_test])
        #nmi_test = utility.calc_nmi(output[idx_test], labels[idx_test])
        #f1_test = utility.calc_f1(output[idx_test], labels[idx_test])
        #print('Model: {} | Dataset: {} | Test loss: {:.6f} | Test acc: {:.6f} | Training duration: {:.6f}'.\
        #    format(model_name, dataset_name, loss_test.item(), acc_test.item(), mean_train_epoch_time_duration))
        #nni.report_final_result(acc_test.item())
        return acc_test

## Small/Medium Datasets

In [3]:
datasets = ["cora", "citeseer", "pubmed"]
for dataset in datasets:
    average_acc = 0
    for _ in range(10):
        device, dataset_name, data_path, graph_path, learning_rate, weight_decay_rate, model_name, model_save_path, renorm_adj_type, alpha, K, enable_bias, epochs, opt, early_stopping_patience = get_parameters(dataset)

        features, labels, idx_train, idx_val, idx_test, n_feat, n_class, n_vertex = process_data(device, dataset_name, data_path, graph_path, renorm_adj_type, alpha, K)

        model, loss, early_stopping, optimizer, scheduler = prepare_model(n_feat, n_class, enable_bias, early_stopping_patience, learning_rate, weight_decay_rate, model_save_path, opt)

        mean_train_epoch_time_duration = train(epochs, model, optimizer, scheduler, early_stopping, features, labels, loss, idx_train, idx_val, dataset_name)

        average_acc += test(model, model_save_path, features, labels, loss, idx_test, model_name, dataset_name, mean_train_epoch_time_duration)
    average_acc /= 10
    print("dataset:", dataset, ", average accuracy:", average_acc)

dataset: cora , average accuracy: tensor(0.7978, dtype=torch.float64)


  row_sum_inv = np.power(np.sum(features, axis=1), -1)


dataset: citeseer , average accuracy: tensor(0.6790, dtype=torch.float64)
dataset: pubmed , average accuracy: tensor(0.7857, dtype=torch.float64)


## Alpha and K Value Comparison on Cora, Citeseer, Pubmed

In [3]:
datasets = ["cora", "citeseer", "pubmed"]
alpha_ = [0.0, 0.05, 0.1, 0.15]
K_ = [2, 4, 8, 16, 32, 64]
alpha_standard = 0.05
K_standard = 16
for dataset in datasets:
    K = K_standard
    for alpha in alpha_:
        device, dataset_name, data_path, graph_path, learning_rate, weight_decay_rate, model_name, model_save_path, renorm_adj_type, _, __, enable_bias, epochs, opt, early_stopping_patience = get_parameters(dataset)

        features, labels, idx_train, idx_val, idx_test, n_feat, n_class, n_vertex = process_data(device, dataset_name, data_path, graph_path, renorm_adj_type, alpha, K)

        model, loss, early_stopping, optimizer, scheduler = prepare_model(n_feat, n_class, enable_bias, early_stopping_patience, learning_rate, weight_decay_rate, model_save_path, opt)

        mean_train_epoch_time_duration = train(epochs, model, optimizer, scheduler, early_stopping, features, labels, loss, idx_train, idx_val, dataset_name)

        acc = test(model, model_save_path, features, labels, loss, idx_test, model_name, dataset_name, mean_train_epoch_time_duration)

        print("dataset:", dataset, ", average accuracy:", acc, "alpha:", alpha)
for dataset in datasets:
    alpha = alpha_standard
    for K in K_:
        device, dataset_name, data_path, graph_path, learning_rate, weight_decay_rate, model_name, model_save_path, renorm_adj_type, _, __, enable_bias, epochs, opt, early_stopping_patience = get_parameters(dataset)

        features, labels, idx_train, idx_val, idx_test, n_feat, n_class, n_vertex = process_data(device, dataset_name, data_path, graph_path, renorm_adj_type, alpha, K)

        model, loss, early_stopping, optimizer, scheduler = prepare_model(n_feat, n_class, enable_bias, early_stopping_patience, learning_rate, weight_decay_rate, model_save_path, opt)

        mean_train_epoch_time_duration = train(epochs, model, optimizer, scheduler, early_stopping, features, labels, loss, idx_train, idx_val, dataset_name)

        acc = test(model, model_save_path, features, labels, loss, idx_test, model_name, dataset_name, mean_train_epoch_time_duration)

        print("dataset:", dataset, ", average accuracy:", acc, "K:", K)

dataset: cora , average accuracy: tensor(0.7930, dtype=torch.float64) alpha: 0.0
dataset: cora , average accuracy: tensor(0.7950, dtype=torch.float64) alpha: 0.05
dataset: cora , average accuracy: tensor(0.8050, dtype=torch.float64) alpha: 0.1
dataset: cora , average accuracy: tensor(0.8110, dtype=torch.float64) alpha: 0.15


  row_sum_inv = np.power(np.sum(features, axis=1), -1)


Early stopping counter: 1 out of 10
Early stopping counter: 2 out of 10
Early stopping counter: 3 out of 10
Early stopping counter: 4 out of 10
Early stopping counter: 5 out of 10
Early stopping counter: 6 out of 10
Early stopping counter: 7 out of 10
Early stopping counter: 8 out of 10
Early stopping counter: 9 out of 10
Early stopping counter: 10 out of 10
dataset: citeseer , average accuracy: tensor(0.6770, dtype=torch.float64) alpha: 0.0
Early stopping counter: 1 out of 10
Early stopping counter: 2 out of 10
Early stopping counter: 3 out of 10
Early stopping counter: 4 out of 10
Early stopping counter: 5 out of 10
Early stopping counter: 6 out of 10
Early stopping counter: 7 out of 10
Early stopping counter: 8 out of 10
Early stopping counter: 9 out of 10
Early stopping counter: 10 out of 10
dataset: citeseer , average accuracy: tensor(0.6830, dtype=torch.float64) alpha: 0.05
Early stopping counter: 1 out of 10
Early stopping counter: 1 out of 10
Early stopping counter: 2 out of 10

KeyboardInterrupt: 

## Large OGB Datasets (SSGC) / Community Prediction (Reddit)

In [6]:
from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
import scipy.sparse
import networkx as nx
import torch_geometric
from script import dataloader
import numpy as np
import torch
    
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
dataset_name = ["arxiv", "mag", "products", "reddit"]
renorm_adj_type = "sym"
learning_rate = 0.2
weight_decay_rate = 0.0000005
alpha = 0.05
K = 16
enable_bias = True
epochs = 15
opt = "Adam"
early_stopping_patience = 1000
model_name = "ssgc"
data_path = './data/'
graph_path = None

#device, dataset_name, data_path, graph_path, learning_rate, weight_decay_rate, model_name, model_save_path, renorm_adj_type, alpha, K, enable_bias, epochs, opt, early_stopping_patience = get_parameters('citeseer')
for ds in dataset_name:
    model_save_path = './model/save/' + ds + '/ssgc_sym_' + str(alpha) + \
                    '_alpha_' + str(K) + '_power' + '.pth'
    #for i in range(10): Those datasets are way too big we dont have the computation power to use 10 runs
    features, labels, idx_train, idx_val, idx_test, n_feat, n_class, n_vertex = process_data(device, ds, data_path, graph_path, renorm_adj_type, alpha, K)

    model, loss, early_stopping, optimizer, scheduler = prepare_model(n_feat, n_class, enable_bias, early_stopping_patience, learning_rate, weight_decay_rate, model_save_path, opt)

    mean_train_epoch_time_duration = train(epochs, model, optimizer, scheduler, early_stopping, features, labels, loss, idx_train, idx_val, dataset_name)

    average_acc = test(model, model_save_path, features, labels, loss, idx_test, model_name, dataset_name, mean_train_epoch_time_duration)
    print("dataset:", ds, "acc:", average_acc)
    break

<class 'scipy.sparse.csc.csc_matrix'>
Epoch: 001 | Learning rate: 0.20000000 | Train loss: 3.976514 | Train acc: 0.034022 | Val loss: 3.780668 | Val acc: 0.024900 | Training duration: 0.316380
[2022-01-23 23:06:15] INFO (nni/MainThread) Intermediate result: 0.024900164435048156  (Index 100)
Epoch: 002 | Learning rate: 0.20000000 | Train loss: 3.785760 | Train acc: 0.205540 | Val loss: 3.375981 | Val acc: 0.181147 | Training duration: 0.405493
[2022-01-23 23:06:15] INFO (nni/MainThread) Intermediate result: 0.1811470183563207  (Index 101)
Epoch: 003 | Learning rate: 0.20000000 | Train loss: 4.677960 | Train acc: 0.270890 | Val loss: 3.901505 | Val acc: 0.260445 | Training duration: 0.379934
[2022-01-23 23:06:16] INFO (nni/MainThread) Intermediate result: 0.26044498137521394  (Index 102)
Early stopping counter: 1 out of 1000
Epoch: 004 | Learning rate: 0.20000000 | Train loss: 6.538178 | Train acc: 0.246687 | Val loss: 5.097915 | Val acc: 0.254908 | Training duration: 0.380181
[2022-01-2

Early stopping counter: 3 out of 1000
Epoch: 030 | Learning rate: 0.20000000 | Train loss: 3.542371 | Train acc: 0.378718 | Val loss: 3.126051 | Val acc: 0.375080 | Training duration: 0.377053
[2022-01-23 23:06:27] INFO (nni/MainThread) Intermediate result: 0.375079700661096  (Index 129)
Epoch: 031 | Learning rate: 0.20000000 | Train loss: 3.522965 | Train acc: 0.374726 | Val loss: 3.051761 | Val acc: 0.349072 | Training duration: 0.338187
[2022-01-23 23:06:27] INFO (nni/MainThread) Intermediate result: 0.34907211651397696  (Index 130)
Epoch: 032 | Learning rate: 0.20000000 | Train loss: 3.559358 | Train acc: 0.370746 | Val loss: 3.114008 | Val acc: 0.335213 | Training duration: 0.374455
[2022-01-23 23:06:28] INFO (nni/MainThread) Intermediate result: 0.3352125910265445  (Index 131)
Early stopping counter: 1 out of 1000
Epoch: 033 | Learning rate: 0.20000000 | Train loss: 3.368493 | Train acc: 0.383754 | Val loss: 3.157601 | Val acc: 0.348132 | Training duration: 0.332783
[2022-01-23 2

Early stopping counter: 2 out of 1000
Epoch: 060 | Learning rate: 0.20000000 | Train loss: 2.995047 | Train acc: 0.445773 | Val loss: 2.689713 | Val acc: 0.415115 | Training duration: 0.390875
[2022-01-23 23:06:39] INFO (nni/MainThread) Intermediate result: 0.41511460116111276  (Index 159)
Early stopping counter: 3 out of 1000
Epoch: 061 | Learning rate: 0.20000000 | Train loss: 2.900319 | Train acc: 0.482181 | Val loss: 2.668169 | Val acc: 0.475821 | Training duration: 0.362269
[2022-01-23 23:06:39] INFO (nni/MainThread) Intermediate result: 0.47582133628645257  (Index 160)
Early stopping counter: 4 out of 1000
Epoch: 062 | Learning rate: 0.20000000 | Train loss: 2.790653 | Train acc: 0.504778 | Val loss: 2.640281 | Val acc: 0.524481 | Training duration: 0.376529
[2022-01-23 23:06:40] INFO (nni/MainThread) Intermediate result: 0.524480687271385  (Index 161)
Early stopping counter: 5 out of 1000
Epoch: 063 | Learning rate: 0.20000000 | Train loss: 2.829446 | Train acc: 0.494496 | Val l

## Large OGB ARXIV (SSGC + MLP)

### Embedding

In [4]:
import argparse

import torch
import torch.nn.functional as F

from torch_geometric.utils import dropout_adj
import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
from typing import Optional
from torch_geometric.typing import Adj, OptTensor

from torch import Tensor
from torch.nn import Linear
from torch_sparse import SparseTensor, matmul
#from logger import Logger
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch.nn import Linear
from torch_geometric.nn.conv import MessagePassing



class SGConv(MessagePassing):
    r"""The simple graph convolutional operator from the `"Simplifying Graph
    Convolutional Networks" <https://arxiv.org/abs/1902.07153>`_ paper

    .. math::
        \mathbf{X}^{\prime} = {\left(\mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
        \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X} \mathbf{\Theta},

    where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
    adjacency matrix with inserted self-loops and
    :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

    Args:
        in_channels (int): Size of each input sample.
        out_channels (int): Size of each output sample.
        K (int, optional): Number of hops :math:`K`. (default: :obj:`1`)
        cached (bool, optional): If set to :obj:`True`, the layer will cache
            the computation of :math:`{\left(\mathbf{\hat{D}}^{-1/2}
            \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X}` on
            first execution, and will use the cached version for further
            executions.
            This parameter should only be set to :obj:`True` in transductive
            learning scenarios. (default: :obj:`False`)
        add_self_loops (bool, optional): If set to :obj:`False`, will not add
            self-loops to the input graph. (default: :obj:`True`)
        bias (bool, optional): If set to :obj:`False`, the layer will not learn
            an additive bias. (default: :obj:`True`)
        **kwargs (optional): Additional arguments of
            :class:`torch_geometric.nn.conv.MessagePassing`.
    """

    _cached_x: Optional[Tensor]

    def __init__(self, in_channels: int, out_channels: int, K: int = 1,
                 cached: bool = False, add_self_loops: bool = True,
                 bias: bool = True, dropout: float = 0.05, **kwargs):
        super(SGConv, self).__init__(aggr='add', **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.K = K
        self.cached = cached
        self.add_self_loops = add_self_loops
        self.dropout = dropout

        self._cached_x = None

        self.lin = Linear(in_channels, out_channels, bias=bias)

        self.reset_parameters()

    def reset_parameters(self):
        #self.lin.reset_parameters()
        self._cached_x = None

    def forward(self, x: Tensor, edge_index: Adj,
                edge_weight: OptTensor = None) -> Tensor:
        """"""
        cache = self._cached_x
        if cache is None:
            if isinstance(edge_index, Tensor):
                edge_index, edge_weight = gcn_norm(  # yapf: disable
                    edge_index, edge_weight, x.size(self.node_dim), False,
                    self.add_self_loops, dtype=x.dtype)
            elif isinstance(edge_index, SparseTensor):
                edge_index = gcn_norm(  # yapf: disable
                    edge_index, edge_weight, x.size(self.node_dim), False,
                    self.add_self_loops, dtype=x.dtype)

            x_set = []
            alpha = 0.05
            output = alpha * x
            #temp_edge_index, edge_weight = dropout_adj(edge_index, 0.5)
            for k in range(self.K):
                x = self.propagate(edge_index, x=x, edge_weight=edge_weight,
                                   size=None)
                # x_set.append(x)
                output = output + (1. / self.K) * x
            # x = torch.stack(x_set,2)
            # alpha = 0.05
            # x = (1-alpha)*torch.mean(x,2).squeeze() + alpha*x_ori
            x = output
            if self.cached:
                self._cached_x = x
        else:
            x = cache

        return x#self.lin(x)

    def message(self, x_j: Tensor, edge_weight: Tensor) -> Tensor:
        return edge_weight.view(-1, 1) * x_j

    def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor:
        return matmul(adj_t, x, reduce=self.aggr)

    def __repr__(self):
        return '{}({}, {}, K={})'.format(self.__class__.__name__,
                                         self.in_channels, self.out_channels,
                                         self.K)


class SGC(torch.nn.Module):
    def __init__(self, in_channels, hidden, out_channels, num_layers, dropout):
        super(SGC, self).__init__()
        self.conv1 = SGConv(
            in_channels, hidden, K=num_layers, cached=True)
        self.lin = torch.nn.Linear(hidden, out_channels)
        self.dropout = dropout

    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.lin.reset_parameters()

    def forward(self, x, edge_index):
        # x, edge_index = data.x, data.edge_index
        #x = self.conv1(x, edge_index)
        # x = F.relu(x)
        # x = F.dropout(x, p=self.dropout, training=self.training)
        return self.conv1(x, edge_index)#F.log_softmax(x, dim=1)


class SAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(SAGE, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x, adj_t):
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, adj_t)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        return x.log_softmax(dim=-1)


def train(model, data, train_idx, optimizer):
    model.train()

    optimizer.zero_grad()
    out = model(data.x, data.adj_t)[train_idx]
    loss = F.nll_loss(out, data.y.squeeze(1)[train_idx])
    loss.backward()
    optimizer.step()

    return loss.item()


@torch.no_grad()
def test(model, data, split_idx, evaluator):
    model.eval()

    out = model(data.x, data.adj_t)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': data.y[split_idx['train']],
        'y_pred': y_pred[split_idx['train']],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': data.y[split_idx['valid']],
        'y_pred': y_pred[split_idx['valid']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': data.y[split_idx['test']],
        'y_pred': y_pred[split_idx['test']],
    })['acc']

    return train_acc, valid_acc, test_acc


def main():

    device = f'cuda:{0}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-arxiv',
                                     transform=T.ToSparseTensor())

    data = dataset[0]
    data.adj_t = data.adj_t.to_symmetric()
    data = data.to(device)

    split_idx = dataset.get_idx_split()
    train_idx = split_idx['train'].to(device)

    model = SGC(data.num_features, 256, dataset.num_classes, 10, 0.5).to(device)
    
    features = model(data.x, data.adj_t)
    torch.save(features,'embedding.pt')
main()

### Execution

In [5]:
import argparse

import torch
import torch.nn.functional as F

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

#from logger import Logger


class MLP(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(MLP, self).__init__()

        self.lins = torch.nn.ModuleList()
        self.lins.append(torch.nn.Linear(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.lins.append(torch.nn.Linear(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.lins.append(torch.nn.Linear(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters(self):
        for lin in self.lins:
            lin.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x):
        for i, lin in enumerate(self.lins[:-1]):
            x = lin(x)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lins[-1](x)
        return torch.log_softmax(x, dim=-1)


def train(model, x, y_true, train_idx, optimizer):
    model.train()

    optimizer.zero_grad()
    out = model(x[train_idx])
    loss = F.nll_loss(out, y_true.squeeze(1)[train_idx])
    loss.backward()
    optimizer.step()

    return loss.item()


@torch.no_grad()
def test(model, x, y_true, split_idx, evaluator):
    model.eval()

    out = model(x)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': y_true[split_idx['train']],
        'y_pred': y_pred[split_idx['train']],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': y_true[split_idx['valid']],
        'y_pred': y_pred[split_idx['valid']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': y_true[split_idx['test']],
        'y_pred': y_pred[split_idx['test']],
    })['acc']

    return train_acc, valid_acc, test_acc


def main():

    device = f'cuda:{0}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-arxiv')
    split_idx = dataset.get_idx_split()
    data = dataset[0]

    x = data.x
    embedding = torch.load('embedding.pt', map_location='cpu')
    #x = torch.cat([x, embedding], dim=-1)
    x = embedding
    x = x.to(device)

    y_true = data.y.to(device)
    train_idx = split_idx['train'].to(device)

    model = MLP(x.size(-1), 256, dataset.num_classes,
                3, 0.5).to(device)

    evaluator = Evaluator(name='ogbn-arxiv')
    #logger = Logger(args.runs, args)

    for run in range(10):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        for epoch in range(1, 1 + 500):
            loss = train(model, x, y_true, train_idx, optimizer)
            result = test(model, x, y_true, split_idx, evaluator)
            #logger.add_result(run, result)

            if epoch % 1 == 0:
                train_acc, valid_acc, test_acc = result
                print(f'Run: {run + 1:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Loss: {loss:.4f}, '
                      f'Train: {100 * train_acc:.2f}%, '
                      f'Valid: {100 * valid_acc:.2f}%, '
                      f'Test: {100 * test_acc:.2f}%')

        #logger.print_statistics(run)
    #logger.print_statistics()

main()

Run: 01, Epoch: 01, Loss: 3.7619, Train: 12.49%, Valid: 23.52%, Test: 21.88%
Run: 01, Epoch: 02, Loss: 2.7462, Train: 34.00%, Valid: 34.02%, Test: 30.28%
Run: 01, Epoch: 03, Loss: 2.3291, Train: 31.60%, Valid: 32.18%, Test: 29.18%
Run: 01, Epoch: 04, Loss: 2.1686, Train: 22.62%, Valid: 15.97%, Test: 13.49%
Run: 01, Epoch: 05, Loss: 2.0158, Train: 25.81%, Valid: 16.46%, Test: 13.70%
Run: 01, Epoch: 06, Loss: 1.8935, Train: 35.12%, Valid: 31.40%, Test: 31.58%
Run: 01, Epoch: 07, Loss: 1.7974, Train: 34.11%, Valid: 28.79%, Test: 29.74%
Run: 01, Epoch: 08, Loss: 1.7222, Train: 33.07%, Valid: 24.31%, Test: 24.93%
Run: 01, Epoch: 09, Loss: 1.6544, Train: 36.09%, Valid: 28.05%, Test: 29.06%
Run: 01, Epoch: 10, Loss: 1.6055, Train: 39.27%, Valid: 32.02%, Test: 34.14%
Run: 01, Epoch: 11, Loss: 1.5499, Train: 41.07%, Valid: 34.07%, Test: 36.79%
Run: 01, Epoch: 12, Loss: 1.5198, Train: 42.28%, Valid: 35.73%, Test: 38.53%
Run: 01, Epoch: 13, Loss: 1.4785, Train: 42.45%, Valid: 34.98%, Test: 37.92%

KeyboardInterrupt: 

# Large OGB MAG (SSGC + MLP)

### Embedding

In [1]:
import argparse

import torch
import torch.nn.functional as F
from torch_geometric.data import Data
import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
from typing import Optional
from torch_geometric.typing import Adj, OptTensor

from torch import Tensor
from torch.nn import Linear
from torch_sparse import SparseTensor, matmul
#from logger import Logger
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch.nn import Linear
from torch_geometric.nn.conv import MessagePassing



class SGConv(MessagePassing):
    r"""The simple graph convolutional operator from the `"Simplifying Graph
    Convolutional Networks" <https://arxiv.org/abs/1902.07153>`_ paper

    .. math::
        \mathbf{X}^{\prime} = {\left(\mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
        \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X} \mathbf{\Theta},

    where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
    adjacency matrix with inserted self-loops and
    :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

    Args:
        in_channels (int): Size of each input sample.
        out_channels (int): Size of each output sample.
        K (int, optional): Number of hops :math:`K`. (default: :obj:`1`)
        cached (bool, optional): If set to :obj:`True`, the layer will cache
            the computation of :math:`{\left(\mathbf{\hat{D}}^{-1/2}
            \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X}` on
            first execution, and will use the cached version for further
            executions.
            This parameter should only be set to :obj:`True` in transductive
            learning scenarios. (default: :obj:`False`)
        add_self_loops (bool, optional): If set to :obj:`False`, will not add
            self-loops to the input graph. (default: :obj:`True`)
        bias (bool, optional): If set to :obj:`False`, the layer will not learn
            an additive bias. (default: :obj:`True`)
        **kwargs (optional): Additional arguments of
            :class:`torch_geometric.nn.conv.MessagePassing`.
    """

    _cached_x: Optional[Tensor]

    def __init__(self, in_channels: int, out_channels: int, K: int = 1,
                 cached: bool = False, add_self_loops: bool = True,
                 bias: bool = True, dropout: float = 0.05, **kwargs):
        super(SGConv, self).__init__(aggr='add', **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.K = K
        self.cached = cached
        self.add_self_loops = add_self_loops
        self.dropout = dropout

        self._cached_x = None

        #self.lin = Linear(in_channels, out_channels, bias=bias)

        self.reset_parameters()

    def reset_parameters(self):
        #self.lin.reset_parameters()
        self._cached_x = None

    def forward(self, x: Tensor, edge_index: Adj,
                edge_weight: OptTensor = None) -> Tensor:
        """"""
        cache = self._cached_x
        if cache is None:
            if isinstance(edge_index, Tensor):
                edge_index, edge_weight = gcn_norm(  # yapf: disable
                    edge_index, edge_weight, x.size(self.node_dim), False,
                    self.add_self_loops, dtype=x.dtype)
            elif isinstance(edge_index, SparseTensor):
                edge_index = gcn_norm(  # yapf: disable
                    edge_index, edge_weight, x.size(self.node_dim), False,
                    self.add_self_loops, dtype=x.dtype)

            x = F.normalize(x,dim=1, p=2)
            alpha = 0.05
            output = alpha*x
            for k in range(self.K):
                x = self.propagate(edge_index, x=x, edge_weight=edge_weight,
                                   size=None)
                #x_set.append(x)
                output = output + (1./self.K)*x
            #x = torch.stack(x_set,2)
            #alpha = 0.05
            #x = (1-alpha)*torch.mean(x,2).squeeze() + alpha*x_ori
            x = output
            if self.cached:
                self._cached_x = x
        else:
            x = cache

        return x#self.lin(x)

    def message(self, x_j: Tensor, edge_weight: Tensor) -> Tensor:
        return edge_weight.view(-1, 1) * x_j

    def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor:
        return matmul(adj_t, x, reduce=self.aggr)

    def __repr__(self):
        return '{}({}, {}, K={})'.format(self.__class__.__name__,
                                         self.in_channels, self.out_channels,
                                         self.K)


class SGC(torch.nn.Module):
    def __init__(self, in_channels, out_channels, num_layers):
        super(SGC, self).__init__()
        self.conv1 = SGConv(
            in_channels, out_channels, K=num_layers, cached=True)

    def reset_parameters(self):
        self.conv1.reset_parameters()

    def forward(self, x, edge_index):
        # x, edge_index = data.x, data.edge_index
        #x = self.conv1(x, edge_index)
        return self.conv1(x, edge_index)#F.log_softmax(x, dim=1)

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(GCN, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(
            GCNConv(in_channels, hidden_channels, normalize=False))
        for _ in range(num_layers - 2):
            self.convs.append(
                GCNConv(hidden_channels, hidden_channels, normalize=False))
        self.convs.append(
            GCNConv(hidden_channels, out_channels, normalize=False))

        self.dropout = dropout

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()

    def forward(self, x, adj_t):
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, adj_t)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        return x.log_softmax(dim=-1)


class SAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(SAGE, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()

    def forward(self, x, adj_t):
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, adj_t)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        return x.log_softmax(dim=-1)


def train(model, data, train_idx, optimizer):
    model.train()

    optimizer.zero_grad()
    out = model(data.x, data.adj_t)[train_idx]
    loss = F.nll_loss(out, data.y.squeeze(1)[train_idx])
    loss.backward()
    optimizer.step()

    return loss.item()


@torch.no_grad()
def test(model, data, split_idx, evaluator):
    model.eval()

    out = model(data.x, data.adj_t)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': data.y[split_idx['train']['paper']],
        'y_pred': y_pred[split_idx['train']['paper']],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': data.y[split_idx['valid']['paper']],
        'y_pred': y_pred[split_idx['valid']['paper']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': data.y[split_idx['test']['paper']],
        'y_pred': y_pred[split_idx['test']['paper']],
    })['acc']

    return train_acc, valid_acc, test_acc


def main():

    device = f'cuda:{0}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-mag')
    rel_data = dataset[0]

    # We are only interested in paper <-> paper relations.
    data = Data(
        x=rel_data.x_dict['paper'],
        edge_index=rel_data.edge_index_dict[('paper', 'cites', 'paper')],
        y=rel_data.y_dict['paper'])

    data = T.ToSparseTensor()(data)
    data.adj_t = data.adj_t.to_symmetric()

    split_idx = dataset.get_idx_split()
    train_idx = split_idx['train']['paper'].to(device)

    model = SGC(data.num_features,
                dataset.num_classes, 16,
                ).to(device)

    # Pre-compute GCN normalization.
    adj_t = data.adj_t.set_diag()
    deg = adj_t.sum(dim=1).to(torch.float)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    adj_t = deg_inv_sqrt.view(-1, 1) * adj_t * deg_inv_sqrt.view(1, -1)
    data.adj_t = adj_t

    data = data.to(device)
    features = model(data.x, data.adj_t)
    torch.save(features,'embedding.pt')
    # evaluator = Evaluator(name='ogbn-mag')
    # logger = Logger(args.runs, args)
    #
    # for run in range(args.runs):
    #     model.reset_parameters()
    #     optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    #     for epoch in range(1, 1 + args.epochs):
    #         loss = train(model, data, train_idx, optimizer)
    #         result = test(model, data, split_idx, evaluator)
    #         logger.add_result(run, result)
    #
    #         if epoch % args.log_steps == 0:
    #             train_acc, valid_acc, test_acc = result
    #             print(f'Run: {run + 1:02d}, '
    #                   f'Epoch: {epoch:02d}, '
    #                   f'Loss: {loss:.4f}, '
    #                   f'Train: {100 * train_acc:.2f}%, '
    #                   f'Valid: {100 * valid_acc:.2f}% '
    #                   f'Test: {100 * test_acc:.2f}%')
    #
    #     logger.print_statistics(run)
    #logger.print_statistics()


main()

### Execution

In [None]:
import argparse

import torch
import torch.nn.functional as F

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

#from logger import Logger


class MLP(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(MLP, self).__init__()

        self.lins = torch.nn.ModuleList()
        self.lins.append(torch.nn.Linear(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.lins.append(torch.nn.Linear(hidden_channels, hidden_channels))
        self.lins.append(torch.nn.Linear(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters(self):
        for lin in self.lins:
            lin.reset_parameters()

    def forward(self, x):
        for i, lin in enumerate(self.lins[:-1]):
            x = lin(x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lins[-1](x)
        return torch.log_softmax(x, dim=-1)


def train(model, x, y_true, train_idx, optimizer):
    model.train()

    optimizer.zero_grad()
    out = model(x[train_idx])
    loss = F.nll_loss(out, y_true[train_idx].squeeze(1))
    loss.backward()
    optimizer.step()

    return loss.item()


@torch.no_grad()
def test(model, x, y_true, split_idx, evaluator):
    model.eval()

    out = model(x)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': y_true[split_idx['train']['paper']],
        'y_pred': y_pred[split_idx['train']['paper']],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': y_true[split_idx['valid']['paper']],
        'y_pred': y_pred[split_idx['valid']['paper']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': y_true[split_idx['test']['paper']],
        'y_pred': y_pred[split_idx['test']['paper']],
    })['acc']

    return train_acc, valid_acc, test_acc


def main():

    device = f'cuda:{0}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-mag')
    split_idx = dataset.get_idx_split()
    data = dataset[0]

    x = data.x_dict['paper']
    embedding = torch.load('embedding.pt', map_location='cpu')
    x = torch.cat([x, embedding], dim=-1)
    x = x.to(device)

    y_true = data.y_dict['paper'].to(device)
    train_idx = split_idx['train']['paper'].to(device)

    model = MLP(x.size(-1), 256, dataset.num_classes,
                3, 0.0).to(device)

    evaluator = Evaluator(name='ogbn-mag')
    #logger = Logger(args.runs, args)

    for run in range(10):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        for epoch in range(1, 1 + 500):
            loss = train(model, x, y_true, train_idx, optimizer)
            result = test(model, x, y_true, split_idx, evaluator)
            #logger.add_result(run, result)

            if epoch % 1 == 0:
                train_acc, valid_acc, test_acc = result
                print(f'Run: {run + 1:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Loss: {loss:.4f}, '
                      f'Train: {100 * train_acc:.2f}%, '
                      f'Valid: {100 * valid_acc:.2f}%, '
                      f'Test: {100 * test_acc:.2f}%')

        #logger.print_statistics(run)
    #logger.print_statistics()

main()

# Node Clustering

In [6]:
import scipy.io as sio
import time
import tensorflow as tf
import numpy as np
import scipy.sparse as sp
from sklearn.cluster import KMeans
from NodeClustering.metrics import clustering_metrics
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.preprocessing import normalize
def normalize_adj(adj, type='sym'):
    """Symmetrically normalize adjacency matrix."""
    if type == 'sym':
        adj = sp.coo_matrix(adj)
        rowsum = np.array(adj.sum(1))
        # d_inv_sqrt = np.power(rowsum, -0.5)
        # d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
        # return adj*d_inv_sqrt*d_inv_sqrt.flatten()
        d_inv_sqrt = np.power(rowsum, -0.5).flatten()
        d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
        d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
        return d_mat_inv_sqrt.dot(adj).dot(d_mat_inv_sqrt).tocoo()
    elif type == 'rw':
        rowsum = np.array(adj.sum(1))
        d_inv = np.power(rowsum, -1.0).flatten()
        d_inv[np.isinf(d_inv)] = 0.
        d_mat_inv = sp.diags(d_inv)
        adj_normalized = d_mat_inv.dot(adj)
        return adj_normalized


def preprocess_adj(adj, type='sym', loop=True):
    """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
    if loop:
        adj = adj + sp.eye(adj.shape[0])
    adj_normalized = normalize_adj(adj, type=type)
    return adj_normalized


def to_onehot(prelabel):
    k = len(np.unique(prelabel))
    label = np.zeros([prelabel.shape[0], k])
    label[range(prelabel.shape[0]), prelabel] = 1
    label = label.T
    return label


def square_dist(prelabel, feature):
    if sp.issparse(feature):
        feature = feature.todense()
    feature = np.array(feature)


    onehot = to_onehot(prelabel)

    m, n = onehot.shape
    count = onehot.sum(1).reshape(m, 1)
    count[count==0] = 1

    mean = onehot.dot(feature)/count
    a2 = (onehot.dot(feature*feature)/count).sum(1)
    pdist2 = np.array(a2 + a2.T - 2*mean.dot(mean.T))

    intra_dist = pdist2.trace()
    inter_dist = pdist2.sum() - intra_dist
    intra_dist /= m
    inter_dist /= m * (m - 1)
    return intra_dist

def dist(prelabel, feature):
    k = len(np.unique(prelabel))
    intra_dist = 0

    for i in range(k):
        Data_i = feature[np.where(prelabel == i)]

        Dis = euclidean_distances(Data_i, Data_i)
        n_i = Data_i.shape[0]
        if n_i == 0 or n_i == 1:
            intra_dist = intra_dist
        else:
            intra_dist = intra_dist + 1 / k * 1 / (n_i * (n_i - 1)) * sum(sum(Dis))


    return intra_dist

datasets = ['cora', 'citeseer', 'pubmed', 'wiki']
for dataset in datasets:
    
    #for i in range(10): dont have the computational capacities
    data = sio.loadmat('./NodeClustering/{}.mat'.format(dataset))
    feature = data['fea']
    if sp.issparse(feature):
        feature = feature.todense()

    adj = data['W']
    gnd = data['gnd']
    gnd = gnd.T
    gnd = gnd - 1
    gnd = gnd[0, :]
    k = len(np.unique(gnd))
    adj = sp.coo_matrix(adj)
    intra_list = []
    intra_list.append(10000)


    acc_list = []
    nmi_list = []
    f1_list = []
    stdacc_list = []
    stdnmi_list = []
    stdf1_list = []
    max_iter = 60
    rep = 10
    t = time.time()
    #adj_normalized = preprocess_adj(adj)
    #adj_normalized = (sp.eye(adj_normalized.shape[0]) + adj_normalized) / 2
    adj_normalized = preprocess_adj(adj,loop=False)
    # adj_normalized = (sp.eye(adj_normalized.shape[0]) + adj_normalized + adj_normalized.dot(adj_normalized)) / 3
    total_dist = []

    tt = 0
    alpha = 0.05 #0.05 for pubmed, 0.1 for citeseer
    feature_ori = feature.astype('float64')
    #emb = feature.astype('float64')
    #emb = feature.astype('float64')
    emb = np.zeros_like(feature).astype('float64')
    oneV = np.ones(feature.shape[0])
    den = np.zeros_like(oneV)
    while 1:
        tt = tt + 1
        power = tt
        intraD = np.zeros(rep)


        ac = np.zeros(rep)
        nm = np.zeros(rep)
        f1 = np.zeros(rep)



        feature = adj_normalized.dot(feature)

        emb += feature
        emb_norm = emb/tt
        u, s, v = sp.linalg.svds(emb_norm, k=16, which='LM')
        u = normalize(emb_norm.dot(v.T))




        for i in range(rep):
            kmeans = KMeans(n_clusters=k).fit(u)
            predict_labels = kmeans.predict(u)
            intraD[i] = square_dist(predict_labels, u)
            #intraD[i] = dist(predict_labels, feature)
            cm = clustering_metrics(gnd, predict_labels)
            ac[i], nm[i], f1[i] = cm.evaluationClusterModelFromLabel()

        intramean = np.mean(intraD)
        acc_means = np.mean(ac)
        acc_stds = np.std(ac)
        nmi_means = np.mean(nm)
        nmi_stds = np.std(nm)
        f1_means = np.mean(f1)
        f1_stds = np.std(f1)

        intra_list.append(intramean)
        acc_list.append(acc_means)
        stdacc_list.append(acc_stds)
        nmi_list.append(nmi_means)
        stdnmi_list.append(nmi_stds)
        f1_list.append(f1_means)
        stdf1_list.append(f1_stds)
        #print('power: {}'.format(power),
        #      'intra_dist: {}'.format(intramean),
        #      'acc_mean: {}'.format(acc_means),
        #      'acc_std: {}'.format(acc_stds),
        #      'nmi_mean: {}'.format(nmi_means),
        #      'nmi_std: {}'.format(nmi_stds),
        #      'f1_mean: {}'.format(f1_means),
        #      'f1_std: {}'.format(f1_stds))

        if intra_list[tt] > intra_list[tt - 1] or tt > max_iter:
            #print('bestpower: {}'.format(tt - 1))
            t = time.time() - t
            #print(t)
            break
    print("dataset:", dataset, "acc:", str(sum(acc_list)/len(acc_list)), " nmi:", str(sum(nmi_list)/len(nmi_list)), "f1-score: ", str(sum(f1_list)/len(f1_list)))

dataset: cora acc: 0.6667774495322502  nmi: 0.5156671595176218 f1-score:  0.6304953643534703
dataset: citeseer acc: 0.687702722128566  nmi: 0.42589838263216256 f1-score:  0.6442045975917995
dataset: pubmed acc: 0.6967440097045325  nmi: 0.31792580915945245 f1-score:  0.6907569086430445
dataset: wiki acc: 0.5207093065916596  nmi: 0.49004917952274724 f1-score:  0.4443000509372268


# Document Classification

In [1]:
import time
import argparse
import numpy as np
import pickle
import os
from copy import deepcopy
import torch
import torch.nn.functional as F
import torch.optim as optim
import tabulate
from functools import partial
from DocumentClassification.utils import *
from DocumentClassification.models import SGC

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
#if args.tuned:
#    with open("tuned_result/{}.SGC.tuning.txt".format(args.dataset), "r") as f:
#        args.weight_decay = float(f.read())

torch.backends.cudnn.benchmark = True
#set_seed(args.seed, args.cuda)

def train_linear(model, feat_dict, weight_decay, binary=False):
    if not binary:
        act = partial(F.log_softmax, dim=1)
        criterion = F.nll_loss
    else:
        act = torch.sigmoid
        criterion = F.binary_cross_entropy
    optimizer = optim.LBFGS(model.parameters())
    best_val_loss = float('inf')
    best_val_acc = 0
    plateau = 0
    start = time.perf_counter()
    for epoch in range(3):
        def closure():
            optimizer.zero_grad()
            #output = model(feat_dict["train"].cuda()).squeeze()
            output = model(feat_dict["train"]).squeeze()
            l2_reg = 0.5*weight_decay*(model.W.weight**2).sum()
            #loss = criterion(act(output), label_dict["train"].cuda())+l2_reg
            loss = criterion(act(output), label_dict["train"])+l2_reg
            loss.backward()
            return loss

        optimizer.step(closure)

    train_time = time.perf_counter()-start
    val_res = eval_linear(model, feat_dict["val"],
                          label_dict["val"].cuda(), binary)
    # val_res = eval_linear(model, feat_dict["val"],
    #                       label_dict["val"].cuda(), binary)
    return val_res['accuracy'], model, train_time

def eval_linear(model, features, label, binary=False):
    model.eval()
    if not binary:
        act = partial(F.log_softmax, dim=1)
        criterion = F.nll_loss
    else:
        act = torch.sigmoid
        criterion = F.binary_cross_entropy

    with torch.no_grad():
        output = model(features).squeeze()
        loss = criterion(act(output), label)
        if not binary: predict_class = output.max(1)[1]
        else: predict_class = act(output).gt(0.5).float()
        correct = torch.eq(predict_class, label).long().sum().item()
        acc = correct/predict_class.size(0)

    return {
        'loss': loss.item(),
        'accuracy': acc
    }

datasets = ['20ng', 'r8', 'r52', 'ohsumed', 'mr']
for dataset in datasets:
    sp_adj, index_dict, label_dict = load_corpus('20ng')
    for k, v in label_dict.items():
        if dataset == "mr":
            label_dict[k] = torch.Tensor(v).to(device)
        else:
            label_dict[k] = torch.LongTensor(v).to(device)
    features = torch.arange(sp_adj.shape[0]).to(device)

    adj = sparse_to_torch_sparse(sp_adj, device="cpu")

    if dataset == "mr": nclass = 1
    else: nclass = label_dict["train"].max().item()+1
    #if not args.preprocessed:
    adj_dense = sparse_to_torch_dense(sp_adj, device='cpu')
    feat_dict, precompute_time = sgc_precompute(adj, adj_dense, 5-1, index_dict)
    #else:
    #    # load the relased degree 2 features
    #    with open(os.path.join("preprocessed",
    #        "{}.pkl".format(args.dataset)), "rb") as prep:
    #        feat_dict =  pkl.load(prep)
    #    precompute_time = 0

    model = SGC(nfeat=feat_dict["train"].size(1),
                nclass=nclass)
    #if args.cuda:
    model.cuda()
    val_acc, best_model, train_time = train_linear(model, feat_dict, 0, args.dataset=="mr")
    test_res = eval_linear(best_model, feat_dict["test"].cuda(),
                           label_dict["test"].cuda(), dataset=="mr")
    train_res = eval_linear(best_model, feat_dict["train"].cuda(),
                            label_dict["train"].cuda(), dataset=="mr")
    print("Total Time: {:2f}s, Train acc: {:.4f}, Val acc: {:.4f}, Test acc: {:.4f}".format(precompute_time+train_time, train_res["accuracy"], val_acc, test_res["accuracy"]))

cpu


MemoryError: Unable to allocate 21.1 GiB for an array with shape (53210, 53210) and data type float64