In [None]:
import numpy as np
import scipy.sparse as sp
import torch


def accuracy(output, labels):
  preds = output.max(1)[1].type_as(labels)
  correct = preds.eq(labels).double()
  correct = correct.sum()
  return correct / len(labels)

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F


import math

import torch

from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module


class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
        support = torch.mm(input, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class GCN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout):
        super(GCN, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gc2(x, adj)
        return F.log_softmax(x, dim=1)



In [None]:
!pip install dgl

Collecting dgl
  Downloading dgl-2.1.0-cp310-cp310-manylinux1_x86_64.whl (8.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m53.7 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cublas_cu12-12.1.3.1

In [None]:
def split_train_val_test_ids(labels, train_samples_per_class=20, val_samples_per_class=30):
    unique_labels = np.unique(labels)

    train_ids = []
    val_ids = []
    test_ids = []

    for label in unique_labels:
        # Get indices of samples with the current label
        label_indices = np.where(labels == label)[0]

        # Shuffle the indices to randomize the samples
        np.random.shuffle(label_indices)

        # Split the indices into train, val, and test sets
        train_indices = label_indices[:train_samples_per_class]
        val_indices = label_indices[train_samples_per_class:(train_samples_per_class + val_samples_per_class)]
        test_indices = label_indices[(train_samples_per_class + val_samples_per_class):]

        train_ids.extend(train_indices)
        val_ids.extend(val_indices)
        test_ids.extend(test_indices)

    return train_ids, val_ids, test_ids

In [None]:
from dgl.data import DGLDataset

class Blogcatalog(DGLDataset):
    def __init__(self):
        super().__init__(name="Blogcatalog")

    def process(self):
          print("Loading Blogcatalog Graph dataset....")
          data = np.load('blogcatalog.npz', allow_pickle=True)
          labels = data['node_label']
          feat = data['node_attr']
          adj_matrix =  data['adj_matrix']
          feat = torch.tensor(feat.tolist().toarray()).float()
          labels = torch.tensor(labels)
          labels = labels.to(torch.int64)
          labels = labels - 1
          adj_matrix = adj_matrix.tolist().toarray()
          adj_matrix = adj_matrix + np.transpose(adj_matrix) + np.eye(adj_matrix.shape[0])
          print(adj_matrix)
          src, dst = np.nonzero(adj_matrix)

          self.graph = dgl.graph(
            (src, dst), num_nodes=adj_matrix.shape[0]
            )
          self.graph.ndata["feat"] = feat
          self.graph.ndata["label"] = labels
          self.num_classes = len(np.unique(labels))
          print("Loading Done")

    def __getitem__(self, i):
        return self.graph

    def __len__(self):
        return 1

DGL backend not selected or invalid.  Assuming PyTorch for now.


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


In [None]:
class DBLP(DGLDataset):
    def __init__(self):
        super().__init__(name="DBLP")

    def process(self):
          print("Loading DBLP Graph dataset....")
          data = np.load('DBLP_BERT_graph_data.npz', allow_pickle=True)
          labels = data['labels']
          feat = data['feature_matrix']
          adj_matrix =  data['adj_mat']
          print(labels)

          feat = torch.tensor(feat).float()
          labels = torch.tensor(labels)
          labels = labels.to(torch.int64)
          print(torch.unique(labels))
          adj_matrix = adj_matrix.tolist().toarray()
          adj_matrix = adj_matrix + np.transpose(adj_matrix) + np.eye(adj_matrix.shape[0])
          print(adj_matrix)
          src, dst = np.nonzero(adj_matrix)

          self.graph = dgl.graph(
            (src, dst), num_nodes=adj_matrix.shape[0]
            )
          self.graph.ndata["feat"] = feat
          self.graph.ndata["label"] = labels
          self.num_classes = len(np.unique(labels))
          print("Loading Done")

    def __getitem__(self, i):
        return self.graph

    def __len__(self):
        return 1

Change dataset_name in the below cell for running experiment on specific dataset.

In [None]:
import numpy as np
import time
import torch
import torch.nn.functional as F
import torch.optim as optim
import dgl
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset, CoauthorCSDataset, CoauthorPhysicsDataset
import torch.sparse as sp
import dgl.function as fn

from dgl import AddSelfLoop
import dgl.sparse as dglsp


dataset_name = "DBLP" # change according

if dataset_name == "cora":
  dataset = CoraGraphDataset(transform= AddSelfLoop())

elif dataset_name == "citeseer":
  dataset = CiteseerGraphDataset(transform= AddSelfLoop())

elif dataset_name == "pubmed":
  dataset = PubmedGraphDataset(transform= AddSelfLoop())

elif dataset_name == "CS":
  dataset = CoauthorCSDataset(transform= AddSelfLoop())

elif dataset_name == "Physics":
  dataset = CoauthorPhysicsDataset(transform= AddSelfLoop())

elif dataset_name == "Blogcatalog":
  dataset = Blogcatalog()

elif dataset_name == "DBLP":
  dataset = DBLP()


num_classes = dataset.num_classes
g = dataset[0]
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
g.ndata['norm'] = norm.unsqueeze(1)
g.apply_edges(fn.u_mul_v('norm', 'norm', 'normalized'))

if dataset_name == "cora" or dataset_name == "citeseer" or dataset_name == "pubmed":
  train_mask = g.ndata['train_mask']
  val_mask = g.ndata['val_mask']
  test_mask = g.ndata['test_mask']

elif dataset_name == "CS" or dataset_name == "Physics" or dataset_name == "Blogcatalog" or dataset_name == "DBLP":
  train_node_ids, val_node_ids, test_node_ids = split_train_val_test_ids(g.ndata['label'].numpy())
  train_mask = np.zeros(g.num_nodes(), dtype=bool)
  train_mask[train_node_ids] = True

  val_mask = np.zeros(g.num_nodes(), dtype=bool)
  val_mask[val_node_ids] = True

  test_mask = np.zeros(g.num_nodes(), dtype=bool)
  test_mask[test_node_ids] = True

  train_mask = torch.from_numpy(train_mask)
  val_mask = torch.from_numpy(val_mask)
  test_mask = torch.from_numpy(test_mask)

# get labels
labels = g.ndata['label']
features = g.ndata['feat']

print(torch.unique(features))
print(torch.max(features))
print(torch.min(features))

print("------")


num_nodes = g.number_of_nodes()

adj = g.adj()

indices = adj.indices()
values = torch.squeeze(g.edata['normalized'])
shape = adj.shape

adj = torch.sparse_coo_tensor(indices, values, shape)

print(adj)


idx_train = torch.nonzero(train_mask).flatten()
idx_val = torch.nonzero(val_mask).flatten()
idx_test = torch.nonzero(test_mask).flatten()


Loading DBLP Graph dataset....
[0 0 0 ... 3 3 3]
tensor([0, 1, 2, 3])
[[1. 2. 2. ... 0. 0. 0.]
 [2. 1. 0. ... 0. 0. 0.]
 [2. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 1.]]
Loading Done
tensor([-5.1318, -4.9828, -4.9130,  ...,  1.9756,  2.0021,  2.0364])
tensor(2.0364)
tensor(-5.1318)
------
tensor(indices=tensor([[    0,     0,     0,  ..., 17723, 17724, 17724],
                       [    0,     1,     2,  ..., 17723,  3434, 17724]]),
       values=tensor([0.3333, 0.2582, 0.2887,  ..., 0.5000, 0.1768, 0.5000]),
       size=(17725, 17725), nnz=123459, layout=torch.sparse_coo)


In [None]:
def train(epoch, verbose):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    output = model(features, adj)
    loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    acc_train = accuracy(output[idx_train], labels[idx_train])
    loss_train.backward()
    optimizer.step()

    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])

    if verbose:
      print('Epoch: {:04d}'.format(epoch+1),
            'loss_train: {:.4f}'.format(loss_train.item()),
            'acc_train: {:.4f}'.format(acc_train.item()),
            'loss_val: {:.4f}'.format(loss_val.item()),
            'acc_val: {:.4f}'.format(acc_val.item()),
            'time: {:.4f}s'.format(time.time() - t))

    return loss_val.item()


def test():
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))


In [None]:
from itertools import product
from tqdm import tqdm

# Define hyperparameter grid
epochs = 1000
hidden_list = [32, 64]
dropout = 0.5
min_delta = 0.001
lr_list = [0.01, 0.001]
weight_decay_list = [0.001, 0.0001, 5e-4]

# Store results
results = []

# Define a dictionary to map learning rates to patience values
lr_patience_dict = {0.01: 25, 0.001: 50}

t_start = time.time()

# Perform grid search
# for hidden, lr, weight_decay in product(hidden_list, lr_list, weight_decay_list):
for hidden, lr, weight_decay in tqdm(list(product(hidden_list, lr_list, weight_decay_list)), desc="Hyperparameter Grid Search"):

    # Create the GCN model with current hyperparameters
    model = GCN(nfeat=features.shape[1],
                nhid=hidden,
                nclass=labels.max().item() + 1,
                dropout=dropout)

    optimizer = optim.Adam(model.parameters(),
                           lr=lr, weight_decay=weight_decay)

    # Train the model

    best_val_loss = float('inf')
    current_patience = 0
    patience = lr_patience_dict.get(lr, 10)  # Get patience from the dictionary or use a default value

    for epoch in range(epochs):
        curr_val_loss = train(epoch, False)

        # Early stopping check
        if best_val_loss - curr_val_loss > min_delta:
            best_val_loss = curr_val_loss
            current_patience = 0
        else:
            current_patience += 1

        if current_patience > patience:
            # print(f'Early stopping at epoch {epoch}')
            break

    # Store the results
    results.append({
        'hidden': hidden,
        'lr': lr,
        'weight_decay': weight_decay,
        'val_loss': best_val_loss,
    })


    print("Hidden:", hidden,
        "lr:", lr,
        "weight_decay:", weight_decay,
        "val_loss:", best_val_loss)

# Find the best set of hyperparameters
best_result = min(results, key=lambda x: x['val_loss'])


t_end = time.time()
print("------------------------")
print(f"Total Time Elapsed to Find Best Hyper-parameters: {t_end-t_start} seconds")
print("------------------------")

# Print the best hyperparameters and test the model
print("Best Hyperparameters:")
print(f"Hidden: {best_result['hidden']}")
print(f"Learning Rate: {best_result['lr']}")
print(f"Weight Decay: {best_result['weight_decay']}")
print(f"Validation Loss: {best_result['val_loss']}")

print("------------------------")


# Update the model with the best hyperparameters
model = GCN(nfeat=features.shape[1],
            nhid=best_result['hidden'],
            nclass=labels.max().item() + 1,
            dropout=dropout)

optimizer = optim.Adam(model.parameters(),
                       lr=best_result['lr'], weight_decay=best_result['weight_decay'])


print("Now training with best Hyper-paramater settings")

best_val_loss = float('inf')
current_patience = 0
patience = lr_patience_dict.get(best_result['lr'], 10)  # Get patience from the dictionary or use a default value
# Train the model again with the best hyperparameters
for epoch in range(epochs):
  curr_val_loss = train(epoch, True)

  # Early stopping check
  if best_val_loss - curr_val_loss > min_delta:
    best_val_loss = curr_val_loss
    current_patience = 0
  else:
        current_patience += 1

  if current_patience > patience:
    print(f'Early stopping at epoch {epoch}')
    break

# Test the final model
test()


Hyperparameter Grid Search:   8%|▊         | 1/12 [00:06<01:12,  6.63s/it]

Hidden: 32 lr: 0.01 weight_decay: 0.001 val_loss: 0.7156115174293518


Hyperparameter Grid Search:  17%|█▋        | 2/12 [00:11<00:56,  5.63s/it]

Hidden: 32 lr: 0.01 weight_decay: 0.0001 val_loss: 0.8101913928985596


Hyperparameter Grid Search:  25%|██▌       | 3/12 [00:15<00:45,  5.08s/it]

Hidden: 32 lr: 0.01 weight_decay: 0.0005 val_loss: 0.8321588039398193


Hyperparameter Grid Search:  33%|███▎      | 4/12 [00:28<01:04,  8.07s/it]

Hidden: 32 lr: 0.001 weight_decay: 0.001 val_loss: 0.7362015247344971


Hyperparameter Grid Search:  42%|████▏     | 5/12 [00:41<01:09,  9.90s/it]

Hidden: 32 lr: 0.001 weight_decay: 0.0001 val_loss: 0.751530647277832


Hyperparameter Grid Search:  50%|█████     | 6/12 [00:54<01:04, 10.69s/it]

Hidden: 32 lr: 0.001 weight_decay: 0.0005 val_loss: 0.6944833397865295


Hyperparameter Grid Search:  58%|█████▊    | 7/12 [01:00<00:46,  9.36s/it]

Hidden: 64 lr: 0.01 weight_decay: 0.001 val_loss: 0.7851048111915588


Hyperparameter Grid Search:  67%|██████▋   | 8/12 [01:04<00:30,  7.68s/it]

Hidden: 64 lr: 0.01 weight_decay: 0.0001 val_loss: 0.8174694180488586


Hyperparameter Grid Search:  75%|███████▌  | 9/12 [01:10<00:21,  7.09s/it]

Hidden: 64 lr: 0.01 weight_decay: 0.0005 val_loss: 0.6818962097167969


Hyperparameter Grid Search:  83%|████████▎ | 10/12 [01:23<00:17,  8.90s/it]

Hidden: 64 lr: 0.001 weight_decay: 0.001 val_loss: 0.7323377132415771


Hyperparameter Grid Search:  92%|█████████▏| 11/12 [01:33<00:09,  9.16s/it]

Hidden: 64 lr: 0.001 weight_decay: 0.0001 val_loss: 0.7127971649169922


Hyperparameter Grid Search: 100%|██████████| 12/12 [01:44<00:00,  8.72s/it]

Hidden: 64 lr: 0.001 weight_decay: 0.0005 val_loss: 0.6681440472602844
------------------------
Total Time Elapsed to Find Best Hyper-parameters: 104.66503047943115 seconds
------------------------
Best Hyperparameters:
Hidden: 64
Learning Rate: 0.001
Weight Decay: 0.0005
Validation Loss: 0.6681440472602844
------------------------
Now training with best Hyper-paramater settings
Epoch: 0001 loss_train: 1.7441 acc_train: 0.1875 loss_val: 1.8855 acc_val: 0.2167 time: 0.0884s
Epoch: 0002 loss_train: 1.4988 acc_train: 0.2625 loss_val: 1.4975 acc_val: 0.2750 time: 0.0882s





Epoch: 0003 loss_train: 1.2749 acc_train: 0.4875 loss_val: 1.4520 acc_val: 0.2917 time: 0.0885s
Epoch: 0004 loss_train: 1.2060 acc_train: 0.4750 loss_val: 1.4119 acc_val: 0.3583 time: 0.0894s
Epoch: 0005 loss_train: 1.1513 acc_train: 0.5250 loss_val: 1.3425 acc_val: 0.3667 time: 0.0922s
Epoch: 0006 loss_train: 1.0194 acc_train: 0.6000 loss_val: 1.2780 acc_val: 0.4250 time: 0.0884s
Epoch: 0007 loss_train: 1.0348 acc_train: 0.6375 loss_val: 1.1597 acc_val: 0.5333 time: 0.0889s
Epoch: 0008 loss_train: 0.9662 acc_train: 0.5875 loss_val: 1.1696 acc_val: 0.5083 time: 0.0857s
Epoch: 0009 loss_train: 0.9811 acc_train: 0.6750 loss_val: 1.1188 acc_val: 0.5083 time: 0.0884s
Epoch: 0010 loss_train: 0.9113 acc_train: 0.6750 loss_val: 1.0948 acc_val: 0.4917 time: 0.0895s
Epoch: 0011 loss_train: 0.8587 acc_train: 0.7125 loss_val: 1.0744 acc_val: 0.5500 time: 0.0858s
Epoch: 0012 loss_train: 0.7670 acc_train: 0.7875 loss_val: 1.1103 acc_val: 0.5583 time: 0.0854s
Epoch: 0013 loss_train: 0.7959 acc_train