In [1]:
# importing necessary libraries
try:
  import dgl
except:
  !pip install  dgl -f https://data.dgl.ai/wheels/torch-2.4/cu121/repo.html
import os
from google.colab import drive
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl.data
import numpy as np
from sklearn.model_selection import train_test_split

Looking in links: https://data.dgl.ai/wheels/torch-2.4/cu121/repo.html
Collecting dgl
  Downloading https://data.dgl.ai/wheels/torch-2.4/cu121/dgl-2.4.0%2Bcu121-cp310-cp310-manylinux1_x86_64.whl (355.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m355.2/355.2 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting torch<=2.4.0 (from dgl)
  Downloading torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch<=2.4.0->dgl)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch<=2.4.0->dgl)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch<=2.4.0->dgl)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<=2.4.0->dgl)


DGL backend not selected or invalid.  Assuming PyTorch for now.


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


In [2]:
drive.mount('/content/drive', force_remount=True)
os.chdir('/content/drive/MyDrive/Colab Notebooks/')

Mounted at /content/drive


In [None]:
# Download the jknet from our github :)
!wget https://raw.githubusercontent.com/Kirdon6/ATDL_assignment2/refs/heads/main/jknet.py
!wget https://raw.githubusercontent.com/Kirdon6/ATDL_assignment2/refs/heads/main/gat.py
!wget https://raw.githubusercontent.com/Kirdon6/ATDL_assignment2/refs/heads/main/gcn.py

In [3]:
import jknet
import gat
import gcn

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# SETUP Hyperparameters

In [5]:
layers = range(1,7)
adam_lr = 5e-3
l2_reg = 5e-4
dropout = 0.5
hidden_dim = [16,32]
runs = 3

# Not stated in paper
epochs = 200

# Prepare Dataset

In [6]:
def prepare_dataset(dataset):
    transform = (dgl.AddSelfLoop())
    if dataset == "Cora":
        dataset = dgl.data.CoraGraphDataset(transform=transform)
    elif dataset == "Citeseer":
        dataset = dgl.data.CiteseerGraphDataset(transform=transform)
    else:
      raise ValueError("Dataset {} is invalid.".format(dataset))
    graph = dataset[0]
    graph = graph.to(device)

    n_classes = dataset.num_classes


    labels = graph.ndata.pop("label").to(device).long()

    features = graph.ndata.pop("feat").to(device)
    n_features = features.shape[-1]


    n_nodes = graph.num_nodes()
    idx = torch.arange(n_nodes).to(device)
    train_idx, test_idx = train_test_split(idx, test_size=0.2)
    train_idx, val_idx = train_test_split(train_idx, test_size=0.25)


    return graph, features, labels, train_idx, val_idx, test_idx, n_classes, n_features

# Essential Functions

In [7]:
def train(graph, features, labels, train_idx,epochs, model, loss_fn, optimizer):
    # define train/val samples, loss function and optimizer
    print("Training...")
    # training loop
    for epoch in range(epochs):
        # print(f"Epoch {epoch}:")
        model.train()
        logits = model(graph, features)
        train_loss = loss_fn(logits[train_idx], labels[train_idx])
        train_acc = torch.sum(
            logits[train_idx].argmax(dim=1) == labels[train_idx]
        ).item() / len(train_idx)
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        # print(f"Loss: {train_loss}")
        # print(f"Accuracy: {train_acc}")

def evaluate(graph, features, labels, val_idx, model, loss_fn):
    print("Evaluating on val subset...")
    model.eval()
    with torch.no_grad():
        logits = model(graph, features)
        valid_loss = loss_fn(logits[val_idx], labels[val_idx])
        valid_acc = torch.sum(
            logits[val_idx].argmax(dim=1) == labels[val_idx]
        ).item() / len(val_idx)

    # Print out performance
    print(f"Validation Loss: {valid_loss}")
    print(f"Validation Accuracy: {valid_acc}")
    print("")


def test(graph, features, labels, test_idx, model):
    print("Testing...")
    model.eval()
    logits = model(graph, features)
    test_acc = torch.sum(
      logits[test_idx].argmax(dim=1) == labels[test_idx]
    ).item() / len(test_idx)

    print(f"Test Accuracy: {test_acc}")
    return test_acc

# Citeseer dataset

In [8]:
graph, features, labels, train_idx, val_idx, test_idx, n_classes, n_features = prepare_dataset("Citeseer")

Downloading /root/.dgl/citeseer.zip from https://data.dgl.ai/dataset/citeseer.zip...


/root/.dgl/citeseer.zip:   0%|          | 0.00/239k [00:00<?, ?B/s]

Extracting file to /root/.dgl/citeseer_d6836239
Finished data loading and preprocessing.
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done saving data into cached files.


# JKNET

## JKNet Concat

### Training

In [1]:
for dim in hidden_dim:
  for num_layers in layers:
    print(f"Training JKNet-cat with dim={dim} and layers={num_layers}")
    print("")
    JKNet = jknet.JKNet(in_dim=n_features, hid_dim=dim,  out_dim=n_classes, num_layers=num_layers, mode='cat', dropout=dropout).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(JKNet.parameters(), lr=adam_lr, weight_decay=l2_reg)
    train(graph, features, labels, train_idx, epochs, JKNet, loss_fn, optimizer)
    evaluate(graph, features, labels, val_idx, JKNet, loss_fn)

NameError: name 'hidden_dim' is not defined

## JKNet Max

### Training

In [10]:
for dim in hidden_dim:
  for num_layers in layers:
    print(f"Training JKNet-max with dim={dim} and layers={num_layers}")
    JKNet = jknet.JKNet(in_dim=n_features, hid_dim=dim,  out_dim=n_classes, num_layers=num_layers, mode='max', dropout=dropout).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(JKNet.parameters(), lr=adam_lr, weight_decay=l2_reg)
    train(graph, features, labels, train_idx, epochs, JKNet, loss_fn, optimizer)
    evaluate(graph, features, labels, val_idx, JKNet, loss_fn)

Training JKNet-max with dim=16 and layers=1
Training...
Evaluating on val subset...
Validation Loss: 0.8270083069801331
Validation Accuracy: 0.7522522522522522

Training JKNet-max with dim=16 and layers=2
Training...
Evaluating on val subset...
Validation Loss: 0.9394572377204895
Validation Accuracy: 0.7312312312312312

Training JKNet-max with dim=16 and layers=3
Training...
Evaluating on val subset...
Validation Loss: 0.9005640149116516
Validation Accuracy: 0.7222222222222222

Training JKNet-max with dim=16 and layers=4
Training...
Evaluating on val subset...
Validation Loss: 0.889142632484436
Validation Accuracy: 0.7552552552552553

Training JKNet-max with dim=16 and layers=5
Training...
Evaluating on val subset...
Validation Loss: 0.9927376508712769
Validation Accuracy: 0.7252252252252253

Training JKNet-max with dim=16 and layers=6
Training...
Evaluating on val subset...
Validation Loss: 1.0102150440216064
Validation Accuracy: 0.6891891891891891

Training JKNet-max with dim=32 and 

## JKNet LSTM

### Training

In [11]:
for dim in hidden_dim:
  for num_layers in layers:
    print(f"Training JKNet-lstm with dim={dim} and layers={num_layers}")
    JKNet = jknet.JKNet(in_dim=n_features, hid_dim=dim,  out_dim=n_classes, num_layers=num_layers, mode='lstm', dropout=dropout).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(JKNet.parameters(), lr=adam_lr, weight_decay=l2_reg)
    train(graph, features, labels, train_idx, epochs, JKNet, loss_fn, optimizer)
    evaluate(graph, features, labels, val_idx, JKNet, loss_fn)

Training JKNet-lstm with dim=16 and layers=1
Training...
Evaluating on val subset...
Validation Loss: 0.8942834138870239
Validation Accuracy: 0.7327327327327328

Training JKNet-lstm with dim=16 and layers=2
Training...
Evaluating on val subset...
Validation Loss: 0.9581063985824585
Validation Accuracy: 0.7327327327327328

Training JKNet-lstm with dim=16 and layers=3
Training...
Evaluating on val subset...
Validation Loss: 1.0286827087402344
Validation Accuracy: 0.7237237237237237

Training JKNet-lstm with dim=16 and layers=4
Training...
Evaluating on val subset...
Validation Loss: 0.948534369468689
Validation Accuracy: 0.7132132132132132

Training JKNet-lstm with dim=16 and layers=5
Training...
Evaluating on val subset...
Validation Loss: 0.9798227548599243
Validation Accuracy: 0.7207207207207207

Training JKNet-lstm with dim=16 and layers=6
Training...
Evaluating on val subset...
Validation Loss: 0.8379961252212524
Validation Accuracy: 0.7417417417417418

Training JKNet-lstm with dim=

# GCN

## Training

In [12]:
for dim in hidden_dim:
  for num_layers in layers:
    print(f"Training GCN with dim={dim} and layers={num_layers}")
    GCN = gcn.GCN(in_size=n_features, hid_size=dim, out_size=n_classes, num_layers=num_layers, dropout=dropout).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(GCN.parameters(), lr=adam_lr, weight_decay=l2_reg)
    train(graph, features, labels, train_idx, epochs, GCN, loss_fn, optimizer)
    evaluate(graph, features, labels, val_idx, GCN, loss_fn)

Training GCN with dim=16 and layers=1
Training...
Evaluating on val subset...
Validation Loss: 0.8453570604324341
Validation Accuracy: 0.7432432432432432

Training GCN with dim=16 and layers=2
Training...
Evaluating on val subset...
Validation Loss: 0.9624481797218323
Validation Accuracy: 0.7252252252252253

Training GCN with dim=16 and layers=3
Training...
Evaluating on val subset...
Validation Loss: 1.028678059577942
Validation Accuracy: 0.7222222222222222

Training GCN with dim=16 and layers=4
Training...
Evaluating on val subset...
Validation Loss: 1.502290964126587
Validation Accuracy: 0.6711711711711712

Training GCN with dim=16 and layers=5
Training...
Evaluating on val subset...
Validation Loss: 1.2408066987991333
Validation Accuracy: 0.5795795795795796

Training GCN with dim=16 and layers=6
Training...
Evaluating on val subset...
Validation Loss: 1.758655071258545
Validation Accuracy: 0.1996996996996997

Training GCN with dim=32 and layers=1
Training...
Evaluating on val subse

# GAT

### Training

In [13]:
for dim in hidden_dim:
  for num_layers in layers:
    print(f"Training GAT with dim={dim} and layers={num_layers}")
    GAT = gat.GAT(in_size=n_features, hid_size=dim, out_size=n_classes, heads=[dim, 1], num_layers=num_layers).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(GAT.parameters(), lr=adam_lr, weight_decay=l2_reg)
    train(graph, features, labels, train_idx, epochs, GAT, loss_fn, optimizer)
    evaluate(graph, features, labels, val_idx, GAT, loss_fn)

Training GAT with dim=16 and layers=1
Training...
Evaluating on val subset...
Validation Loss: 0.7131319046020508
Validation Accuracy: 0.7672672672672672

Training GAT with dim=16 and layers=2
Training...
Evaluating on val subset...
Validation Loss: 0.7376582622528076
Validation Accuracy: 0.7582582582582582

Training GAT with dim=16 and layers=3
Training...
Evaluating on val subset...
Validation Loss: 0.7594983577728271
Validation Accuracy: 0.7747747747747747

Training GAT with dim=16 and layers=4
Training...
Evaluating on val subset...
Validation Loss: 0.7727192640304565
Validation Accuracy: 0.7657657657657657

Training GAT with dim=16 and layers=5
Training...
Evaluating on val subset...
Validation Loss: 0.8376044034957886
Validation Accuracy: 0.7552552552552553

Training GAT with dim=16 and layers=6
Training...
Evaluating on val subset...
Validation Loss: 0.8212985992431641
Validation Accuracy: 0.7462462462462462

Training GAT with dim=32 and layers=1
Training...
Evaluating on val su

## Testing Citeseer

In [14]:
# Evaluate best model
acc_list = list()
for _ in range(runs):
  # always create new splits
  graph, features, labels, train_idx, val_idx, test_idx, n_classes, n_features = prepare_dataset("Citeseer")
  JKNet = jknet.JKNet(in_dim=n_features, hid_dim=16,  out_dim=n_classes, num_layers=2, mode='cat', dropout=dropout).to(device)
  loss_fn = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(JKNet.parameters(), lr=adam_lr, weight_decay=l2_reg)
  train(graph, features, labels, train_idx, epochs, JKNet, loss_fn, optimizer)
  acc = test(graph, features, labels, test_idx, JKNet)
  acc_list.append(acc)

mean = np.around(np.mean(acc_list, axis=0), decimals=3)
std = np.around(np.std(acc_list, axis=0), decimals=3)
print("Total acc:", acc_list)
print("Mean:", mean)
print("Std:", std)

  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7492492492492493
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7327327327327328
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7312312312312312
Total acc: [0.7492492492492493, 0.7327327327327328, 0.7312312312312312]
Mean: 0.738
Std: 0.008


In [15]:
# Evaluate best model
acc_list = list()
for _ in range(runs):
  graph, features, labels, train_idx, val_idx, test_idx, n_classes, n_features = prepare_dataset("Citeseer")
  JKNet = jknet.JKNet(in_dim=n_features, hid_dim=16,  out_dim=n_classes, num_layers=4, mode='max', dropout=dropout).to(device)
  loss_fn = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(JKNet.parameters(), lr=adam_lr, weight_decay=l2_reg)
  train(graph, features, labels, train_idx, epochs, JKNet, loss_fn, optimizer)
  acc = test(graph, features, labels, test_idx, JKNet)
  acc_list.append(acc)

mean = np.around(np.mean(acc_list, axis=0), decimals=3)
std = np.around(np.std(acc_list, axis=0), decimals=3)
print("Total acc: ", acc_list)
print("Mean", mean)
print("Std", std)

  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7447447447447447
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7552552552552553
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7492492492492493
Total acc:  [0.7447447447447447, 0.7552552552552553, 0.7492492492492493]
Mean 0.75
Std 0.004


In [16]:
# Evaluate best model
acc_list = list()
for _ in range(runs):
  graph, features, labels, train_idx, val_idx, test_idx, n_classes, n_features = prepare_dataset("Citeseer")
  JKNet = jknet.JKNet(in_dim=n_features, hid_dim=32,  out_dim=n_classes, num_layers=1, mode='lstm', dropout=dropout).to(device)
  loss_fn = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(JKNet.parameters(), lr=adam_lr, weight_decay=l2_reg)
  train(graph, features, labels, train_idx, epochs, JKNet, loss_fn, optimizer)
  acc = test(graph, features, labels, test_idx, JKNet)
  acc_list.append(acc)

mean = np.around(np.mean(acc_list, axis=0), decimals=3)
std = np.around(np.std(acc_list, axis=0), decimals=3)
print("Total acc: ", acc_list)
print("Mean", mean)
print("Std", std)

  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7372372372372372
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7447447447447447
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7507507507507507
Total acc:  [0.7372372372372372, 0.7447447447447447, 0.7507507507507507]
Mean 0.744
Std 0.006


In [17]:
acc_list = list()
for _ in range(runs):
  graph, features, labels, train_idx, val_idx, test_idx, n_classes, n_features = prepare_dataset("Citeseer")
  GCN = gcn.GCN(in_size=n_features, hid_size=32, out_size=n_classes, num_layers=2, dropout=dropout).to(device)
  loss_fn = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(GCN.parameters(), lr=adam_lr, weight_decay=l2_reg)
  train(graph, features, labels, train_idx, epochs, GCN, loss_fn, optimizer)
  acc = test(graph, features, labels, test_idx, GCN)
  acc_list.append(acc)

mean = np.around(np.mean(acc_list, axis=0), decimals=3)
std = np.around(np.std(acc_list, axis=0), decimals=3)
print("Total acc: ", acc_list)
print("Mean", mean)
print("Std", std)

  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7522522522522522
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7387387387387387
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7132132132132132
Total acc:  [0.7522522522522522, 0.7387387387387387, 0.7132132132132132]
Mean 0.735
Std 0.016


In [18]:
acc_list = list()
for _ in range(runs):
  graph, features, labels, train_idx, val_idx, test_idx, n_classes, n_features = prepare_dataset("Citeseer")
  GAT = gat.GAT(in_size=n_features, hid_size=32, out_size=n_classes, heads=[32, 1], num_layers=1).to(device)
  loss_fn = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(GAT.parameters(), lr=adam_lr, weight_decay=l2_reg)
  train(graph, features, labels, train_idx, epochs, GAT, loss_fn, optimizer)
  acc = test(graph, features, labels, test_idx, GAT)
  acc_list.append(acc)

mean = np.around(np.mean(acc_list, axis=0), decimals=3)
std = np.around(np.std(acc_list, axis=0), decimals=3)
print("Total acc: ", acc_list)
print("Mean", mean)
print("Std", std)

  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7567567567567568
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7357357357357357
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Training...
Testing...
Test Accuracy: 0.7402402402402403
Total acc:  [0.7567567567567568, 0.7357357357357357, 0.7402402402402403]
Mean 0.744
Std 0.009
