To facilitate the execution of experiments on various datasets, modify the "--dataset" argument within the codebase. The initial implementation of SGC, which serves as the foundation for this code, can be found at the GitHub repository https://github.com/Tiiiger/SGC/tree/master.

The Hyper-parameter Tuning of SGC is over 12 possible configurations which can be found in code.

The current implementation downloads and loads the physics and CS dataset using DGL.

For cora, citeseer and pubmed datasets, the loading of the dataset is same as the original implementation ; which uses "data" folder from the the OG implementation repo https://github.com/Tiiiger/SGC/tree/master.

In [None]:
import argparse
import torch

def get_citation_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='Disables CUDA training.')
    parser.add_argument('--seed', type=int, default=42, help='Random seed.')
    parser.add_argument('--epochs', type=int, default=1000,
                        help='Number of epochs to train.')
    parser.add_argument('--lr', type=float, default=0.01,
                        help='Initial learning rate.')
    parser.add_argument('--weight_decay', type=float, default=5e-6,
                        help='Weight decay (L2 loss on parameters).')
    parser.add_argument('--hidden', type=int, default=0,
                        help='Number of hidden units.')
    parser.add_argument('--dropout', type=float, default=0,
                        help='Dropout rate (1 - keep probability).')

    # cora, citeseer, pubmed, CS, Physics, BlogCatalog, DBLP
    parser.add_argument('--dataset', type=str, default="cora",
                        help='Dataset to use.')
    parser.add_argument('--model', type=str, default="SGC",
                        choices=["SGC"],
                        help='model to use.')
    parser.add_argument('--feature', type=str, default="mul",
                        choices=['mul', 'cat', 'adj'],
                        help='feature-type')
    parser.add_argument('--normalization', type=str, default='AugNormAdj',
                       choices=['AugNormAdj'],
                       help='Normalization method for the adjacency matrix.')
    parser.add_argument('--degree', type=int, default=2,
                        help='degree of the approximation.')
    parser.add_argument('--per', type=int, default=-1,
                        help='Number of each nodes so as to balance.')
    parser.add_argument('--experiment', type=str, default="base-experiment",
                        help='feature-type')
    parser.add_argument('--tuned', action='store_true', help='use tuned hyperparams')

    args, _ = parser.parse_known_args()
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    return args

In [None]:
def split_train_val_test_ids(labels, train_samples_per_class=20, val_samples_per_class=30):
    unique_labels = np.unique(labels)

    train_ids = []
    val_ids = []
    test_ids = []

    for label in unique_labels:
        # Get indices of samples with the current label
        label_indices = np.where(labels == label)[0]

        # Shuffle the indices to randomize the samples
        np.random.shuffle(label_indices)

        # Split the indices into train, val, and test sets
        train_indices = label_indices[:train_samples_per_class]
        val_indices = label_indices[train_samples_per_class:(train_samples_per_class + val_samples_per_class)]
        test_indices = label_indices[(train_samples_per_class + val_samples_per_class):]

        train_ids.extend(train_indices)
        val_ids.extend(val_indices)
        test_ids.extend(test_indices)

    return train_ids, val_ids, test_ids

In [None]:
!pip install dgl

Collecting dgl
  Downloading dgl-2.1.0-cp310-cp310-manylinux1_x86_64.whl (8.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cublas_cu12-12.1.3.1

In [None]:
from dgl.data import DGLDataset

class Blogcatalog(DGLDataset):
    def __init__(self):
        super().__init__(name="Blogcatalog")

    def process(self):
          print("Loading Blogcatalog graph dataset...")
          data = np.load('blogcatalog.npz', allow_pickle=True)
          labels = data['node_label']
          feat = data['node_attr']
          adj_matrix =  data['adj_matrix']
          feat = torch.tensor(feat.tolist().toarray()).float()
          labels = torch.tensor(labels)
          labels = labels.to(torch.int64)
          labels = labels - 1
          adj_matrix = adj_matrix.tolist().toarray()
          adj_matrix = adj_matrix + np.transpose(adj_matrix) + np.eye(adj_matrix.shape[0])
          print(adj_matrix)
          src, dst = np.nonzero(adj_matrix)

          self.graph = dgl.graph(
            (src, dst), num_nodes=adj_matrix.shape[0]
            )
          self.graph.ndata["feat"] = feat
          self.graph.ndata["label"] = labels
          self.num_classes = len(np.unique(labels))
          print("Loading done")

    def __getitem__(self, i):
        return self.graph

    def __len__(self):
        return 1


class DBLP(DGLDataset):
    def __init__(self):
        super().__init__(name="DBLP")

    def process(self):
          print("Loading DBLP graph dataset...")
          data = np.load('DBLP_BERT_graph_data.npz', allow_pickle=True)
          labels = data['labels']
          feat = data['feature_matrix']
          adj_matrix =  data['adj_mat']

          feat = torch.tensor(feat).float()
          labels = torch.tensor(labels)
          labels = labels.to(torch.int64)
          adj_matrix = adj_matrix.tolist().toarray()
          adj_matrix = adj_matrix + np.transpose(adj_matrix) + np.eye(adj_matrix.shape[0])
          print(adj_matrix)
          src, dst = np.nonzero(adj_matrix)

          self.graph = dgl.graph(
            (src, dst), num_nodes=adj_matrix.shape[0]
            )
          self.graph.ndata["feat"] = feat
          self.graph.ndata["label"] = labels
          self.num_classes = len(np.unique(labels))
          print("Loading done")

    def __getitem__(self, i):
        return self.graph

    def __len__(self):
        return 1

DGL backend not selected or invalid.  Assuming PyTorch for now.


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


In [None]:
import numpy as np
import scipy.sparse as sp
import torch
import sys
import pickle as pkl
import networkx as nx
from time import perf_counter


import dgl
from dgl.data import CoauthorCSDataset, CoauthorPhysicsDataset
import torch.sparse as sp
import dgl.function as fn

from dgl import AddSelfLoop
import dgl.sparse as dglsp


def parse_index_file(filename):
    """Parse index file."""
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index

def preprocess_citation(adj, features, normalization="FirstOrderGCN"):
    adj_normalizer = fetch_normalization(normalization)
    adj = adj_normalizer(adj)
    features = row_normalize(features)
    return adj, features

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

def load_citation(dataset_str="cora", normalization="AugNormAdj", cuda=True):
    """
    Load Citation Networks Datasets.
    """

    if dataset_str == 'CS' or dataset_str == 'Physics' or dataset_str == 'BlogCatalog' or dataset_str == 'DBLP':
      if dataset_str == 'CS':
        dataset = CoauthorCSDataset(transform= AddSelfLoop())
      if dataset_str == 'Physics':
        dataset = CoauthorPhysicsDataset(transform= AddSelfLoop())
      if dataset_str == 'BlogCatalog':
        dataset = Blogcatalog()
      if dataset_str == 'DBLP':
        dataset = DBLP()
      num_classes = dataset.num_classes
      g = dataset[0]
      degs = g.in_degrees().float()
      norm = torch.pow(degs, -0.5)
      norm[torch.isinf(norm)] = 0
      g.ndata['norm'] = norm.unsqueeze(1)
      g.apply_edges(fn.u_mul_v('norm', 'norm', 'normalized'))

      features = g.ndata['feat']
      labels = g.ndata['label']
      train_node_ids, val_node_ids, test_node_ids = split_train_val_test_ids(g.ndata['label'].numpy())

      train_mask = np.zeros(g.num_nodes(), dtype=bool)
      train_mask[train_node_ids] = True

      val_mask = np.zeros(g.num_nodes(), dtype=bool)
      val_mask[val_node_ids] = True

      test_mask = np.zeros(g.num_nodes(), dtype=bool)
      test_mask[test_node_ids] = True

      train_mask = torch.from_numpy(train_mask)
      val_mask = torch.from_numpy(val_mask)
      test_mask = torch.from_numpy(test_mask)

      num_nodes = g.number_of_nodes()
      adj = g.adj()
      indices = adj.indices()
      values = torch.squeeze(g.edata['normalized'])
      shape = adj.shape

      adj = torch.sparse_coo_tensor(indices, values, shape)

      idx_train = torch.nonzero(train_mask).flatten()
      idx_val = torch.nonzero(val_mask).flatten()
      idx_test = torch.nonzero(test_mask).flatten()
      return adj, features, labels, idx_train, idx_val, idx_test


    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset_str.lower(), names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range-min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y)+500)

    adj, features = preprocess_citation(adj, features, normalization)

    # porting to pytorch
    features = torch.FloatTensor(np.array(features.todense())).float()
    labels = torch.LongTensor(labels)
    labels = torch.max(labels, dim=1)[1]
    adj = sparse_mx_to_torch_sparse_tensor(adj).float()
    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)

    if cuda:
        features = features.cuda()
        adj = adj.cuda()
        labels = labels.cuda()
        idx_train = idx_train.cuda()
        idx_val = idx_val.cuda()
        idx_test = idx_test.cuda()

    return adj, features, labels, idx_train, idx_val, idx_test

def sgc_precompute(features, adj, degree):
    for i in range(degree):
        features = torch.spmm(adj, features)
    return features

In [None]:
import torch
import torch.nn as nn
from torch.nn import Module
import torch.nn.functional as F
import math

class SGC(nn.Module):
    """
    A Simple PyTorch Implementation of Logistic Regression.
    Assuming the features have been preprocessed with k-step graph propagation.
    """
    def __init__(self, nfeat, nclass):
        super(SGC, self).__init__()

        self.W = nn.Linear(nfeat, nclass)

    def forward(self, x):
        return self.W(x)

def get_model(model_opt, nfeat, nclass, nhid=0, dropout=0, cuda=True):
    if model_opt == "SGC":
        model = SGC(nfeat=nfeat,
                    nclass=nclass)
    else:
        raise NotImplementedError('model:{} is not implemented!'.format(model_opt))

    if cuda: model.cuda()
    return model

In [None]:
import numpy as np
import scipy.sparse as sp
import torch

def aug_normalized_adjacency(adj):
   adj = adj + sp.eye(adj.shape[0])
   adj = sp.coo_matrix(adj)
   row_sum = np.array(adj.sum(1))
   d_inv_sqrt = np.power(row_sum, -0.5).flatten()
   d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
   d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
   return d_mat_inv_sqrt.dot(adj).dot(d_mat_inv_sqrt).tocoo()

def fetch_normalization(type):
   switcher = {
       'AugNormAdj': aug_normalized_adjacency,  # A' = (D + I)^-1/2 * ( A + I ) * (D + I)^-1/2
   }
   func = switcher.get(type, lambda: "Invalid normalization technique.")
   return func

def row_normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

def accuracy(output, labels):
    preds = output.max(1)[1].type_as(labels)
    correct = preds.eq(labels).double()
    correct = correct.sum()
    return correct / len(labels)

In [None]:
# Arguments


args = get_citation_args()

def train_regression(model,
                     train_features, train_labels,
                     val_features, val_labels,
                     epochs=args.epochs, weight_decay=args.weight_decay,
                     lr=args.lr, dropout=args.dropout):

    t = perf_counter()
    optimizer = optim.Adam(model.parameters(), lr=lr,
                           weight_decay=weight_decay)

    best_val_loss = float('inf')
    current_patience = 0
    if lr == 0.01:
        patience = 25
        min_delta = 0.0001
    elif lr == 0.001:
        patience = 50
        min_delta = 0.0001
    min_delta = 0.01

    # Training loop
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(train_features)
        loss_train = F.cross_entropy(output, train_labels)
        loss_train.backward()
        optimizer.step()

        with torch.no_grad():
          model.eval()
          output = model(val_features)
          loss_val = F.cross_entropy(output, val_labels)
          acc_val = accuracy(output, val_labels)

        #My additon for early stopping
        curr_val_loss = loss_val.item()

        # Early stopping check
        if best_val_loss - curr_val_loss > min_delta:
          best_val_loss = curr_val_loss
          current_patience = 0
        else:
          current_patience += 1

        if current_patience > patience:
          print(f'Early stopping at epoch {epoch}')
          break
    train_time = perf_counter()-t




    return model, acc_val, train_time

def test_regression(model, test_features, test_labels):
    model.eval()
    return accuracy(model(test_features), test_labels)

In [None]:
import time
import argparse
import numpy as np
import pickle as pkl
import os
from math import log
import torch
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import torch.optim as optim


space = {'weight_decay' : hp.choice('weight_decay', (0.001, 0.0005, 0.0001, 0.00005 ,0.00001, 0.000005)), 'lr' : hp.choice('lr', (0.001, 0.01))}
lr_list = [0.001, 0.01]
weight_decay_list = [0.001, 0.0005, 0.0001, 0.00005 ,0.00001, 0.000005]

adj, features, labels, idx_train, idx_val, idx_test = load_citation(args.dataset, args.normalization, args.cuda)

def sgc_objective(space):
    model = get_model(args.model, features.size(1), labels.max().item()+1, args.hidden, args.dropout, args.cuda)
    features1 = sgc_precompute(features, adj, args.degree)
    model, acc_val, _ = train_regression(model, features1[idx_train], labels[idx_train], features1[idx_val], labels[idx_val],
                                      args.epochs, space['weight_decay'], space['lr'], args.dropout)
    print('lr: {:.2e} '.format(space['lr']) + 'weight decay: {:.2e} '.format(space['weight_decay']) + 'accuracy: {:.4f}'.format(acc_val))
    return {'loss': -acc_val, 'status': STATUS_OK}


t_start_tune = time.time()
best = fmin(sgc_objective, space=space, algo=tpe.suggest, max_evals=12)

t_end_tune = time.time()
print("Best lr: {:.2e}".format(lr_list[best["lr"]]) + "Best weight decay: {:.2e}".format(weight_decay_list[best["weight_decay"]]))

print("Time elapsed for Hyper-parmeter search: ", t_end_tune-t_start_tune, " seconds")

print(best)

os.makedirs("./{}-tuning".format(args.model), exist_ok=True)
path = '{}-tuning/{}.txt'.format(args.model, args.dataset)
with open(path, 'wb') as f: pkl.dump(best, f)

  objects.append(pkl.load(f, encoding='latin1'))


  0%|          | 0/12 [00:00<?, ?trial/s, best loss=?]

  return torch.sparse.FloatTensor(indices, values, shape)


Early stopping at epoch 384
lr: 1.00e-02 weight decay: 1.00e-05 accuracy: 0.7840
Early stopping at epoch 117
lr: 1.00e-02 weight decay: 1.00e-03 accuracy: 0.7780
Early stopping at epoch 405
lr: 1.00e-02 weight decay: 5.00e-06 accuracy: 0.7800
Early stopping at epoch 103
lr: 1.00e-02 weight decay: 1.00e-03 accuracy: 0.7720
Early stopping at epoch 286
lr: 1.00e-02 weight decay: 1.00e-04 accuracy: 0.7760
Early stopping at epoch 153
lr: 1.00e-02 weight decay: 5.00e-04 accuracy: 0.7760
Early stopping at epoch 411
lr: 1.00e-02 weight decay: 5.00e-06 accuracy: 0.7820
Early stopping at epoch 171
lr: 1.00e-03 weight decay: 1.00e-03 accuracy: 0.7400
Early stopping at epoch 117
lr: 1.00e-02 weight decay: 1.00e-03 accuracy: 0.7780
Early stopping at epoch 153
lr: 1.00e-02 weight decay: 5.00e-04 accuracy: 0.7760
Early stopping at epoch 392
lr: 1.00e-02 weight decay: 5.00e-06 accuracy: 0.7780
Early stopping at epoch 51
lr: 1.00e-03 weight decay: 5.00e-04 accuracy: 0.7200
100%|██████████| 12/12 [00:05

In [None]:
import time
import argparse
import numpy as np
import torch
import torch.nn.functional as F

import pickle as pkl
import time


if True:
# if args.tuned:
    if args.model == "SGC":
        with open("{}-tuning/{}.txt".format(args.model, args.dataset), 'rb') as f:
            params = pkl.load(f)
            args.lr = lr_list[params['lr']]
            args.weight_decay = weight_decay_list[params['weight_decay']]
            print("using tuned weight decay: {}".format(args.weight_decay))
    else:
        raise NotImplemented

model = get_model(args.model, features.size(1), labels.max().item()+1, args.hidden, args.dropout, args.cuda)



if args.model == "SGC":
    start_time = time.time()
    features = sgc_precompute(features, adj, args.degree)
    model, acc_val, train_time = train_regression(model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val],
                     args.epochs, args.weight_decay, args.lr, args.dropout)
    acc_test = test_regression(model, features[idx_test], labels[idx_test])
    end_time = time.time()
    total_time_elapsed = end_time - start_time

print("Validation Accuracy: {:.4f} Test Accuracy: {:.4f}".format(acc_val, acc_test))
print("Time Elpased: {:.4f}s".format(total_time_elapsed))

using tuned weight decay: 1e-05
Early stopping at epoch 383
Validation Accuracy: 0.7840 Test Accuracy: 0.7620
Time Elpased: 0.5319s
