In [None]:
! pip install "numpy<2" --force-reinstall
! pip install torch==2.4 pandas scikit-learn pyvis
! pip install dgl -f https://data.dgl.ai/wheels/torch-2.4/
! pip install torchdata==0.6.1 --force-reinstall


In [None]:
from torchdata.datapipes.iter import IterDataPipe

import os
os.environ['DGLBACKEND'] = 'pytorch'

import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.nn.functional as F

import dgl
import dgl.data as ddta
import dgl.nn as dnn

import numpy as np
import scipy.sparse as sp
from sklearn.metrics import roc_auc_score, f1_score, confusion_matrix

import pyvis.network as pyv_n
import itertools


In [None]:
dataset = dgl.data.CoraGraphDataset()
graph = dataset[0]

In [None]:
import random
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    dgl.random.seed(seed)

    # If using CUDA
    torch.cuda.manual_seed(seed)

    torch.cuda.manual_seed_all(seed)  # if multi-GPU

    # For deterministic behavior (optional but useful)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


In [None]:
set_seed(42)

def train_and_test(model,num_epochs = 20):
    set_seed(42)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    best_val_acc = -1
    all_features = graph.ndata["feat"]
    all_labels = graph.ndata["label"]
    train_mask = graph.ndata["train_mask"]
    val_mask = graph.ndata["val_mask"]
    test_mask = graph.ndata["test_mask"]

    for epoch_id in range(num_epochs):
        probabilities = model(graph, all_features)
        predictions = probabilities.argmax(1)

        training_loss = F.cross_entropy(probabilities[train_mask], all_labels[train_mask])

        train_acc = (predictions[train_mask] == all_labels[train_mask]).float().mean()
        val_acc = (predictions[val_mask] == all_labels[val_mask]).float().mean()

        if best_val_acc < val_acc:
            best_val_acc = val_acc

        # Backward
        optimizer.zero_grad()
        training_loss.backward()
        optimizer.step()


        print("Epoch {}, total loss: {:.3f}, train acc: {:.3f}, val acc: {:.3f} (best {:.3f})".format(
                    epoch_id, training_loss, train_acc, val_acc, best_val_acc))



    test_probabilities = model(graph, all_features)[test_mask]
    test_predictions = test_probabilities.argmax(1)

    correct = (test_predictions == all_labels[test_mask]).float()
    accuracy = correct.mean() * 100

    print("Prediction accuracy on the test data is {:.1f}%".format(accuracy.item()))



In [None]:
class MyGraphSAGE(nn.Module):
    def __init__(self, in_dim, hidden_dim, num_classes):
        super(MyGraphSAGE, self).__init__()
        self.conv1 = dnn.SAGEConv(in_dim, hidden_dim, aggregator_type="mean", feat_drop=0.1)
        self.conv2 = dnn.SAGEConv(hidden_dim, hidden_dim, aggregator_type="mean", feat_drop=0.1)
        self.linear = nn.Linear(hidden_dim, num_classes)

    def forward(self, graph, node_features):
        vec_rep = self.conv1(graph, node_features)
        vec_rep = self.conv2(graph, vec_rep)
        vec_rep = self.linear(vec_rep)
        return vec_rep

set_seed(42)
model = MyGraphSAGE(graph.ndata["feat"].shape[1], 16, dataset.num_classes)
train_and_test(model)


Epoch 0, total loss: 1.962, train acc: 0.136, val acc: 0.166 (best 0.166)
Epoch 1, total loss: 1.925, train acc: 0.200, val acc: 0.180 (best 0.180)
Epoch 2, total loss: 1.886, train acc: 0.279, val acc: 0.198 (best 0.198)
Epoch 3, total loss: 1.844, train acc: 0.529, val acc: 0.278 (best 0.278)
Epoch 4, total loss: 1.795, train acc: 0.750, val acc: 0.396 (best 0.396)
Epoch 5, total loss: 1.746, train acc: 0.793, val acc: 0.414 (best 0.414)
Epoch 6, total loss: 1.684, train acc: 0.857, val acc: 0.410 (best 0.414)
Epoch 7, total loss: 1.614, train acc: 0.864, val acc: 0.406 (best 0.414)
Epoch 8, total loss: 1.537, train acc: 0.929, val acc: 0.398 (best 0.414)
Epoch 9, total loss: 1.447, train acc: 0.914, val acc: 0.426 (best 0.426)
Epoch 10, total loss: 1.365, train acc: 0.950, val acc: 0.482 (best 0.482)
Epoch 11, total loss: 1.268, train acc: 0.950, val acc: 0.476 (best 0.482)
Epoch 12, total loss: 1.186, train acc: 0.979, val acc: 0.548 (best 0.548)
Epoch 13, total loss: 1.095, train 

Now lets check if adding more convolution improves the accuracy

In [None]:
class MyGraphSAGE(nn.Module):
    def __init__(self, in_dim, hidden_dim, num_classes):
        super(MyGraphSAGE, self).__init__()
        self.conv1 = dnn.SAGEConv(in_dim, hidden_dim, aggregator_type="mean", feat_drop=0.1)
        self.conv2 = dnn.SAGEConv(hidden_dim, hidden_dim, aggregator_type="mean", feat_drop=0.1)
        self.conv3 = dnn.SAGEConv(hidden_dim, hidden_dim, aggregator_type="mean", feat_drop=0.1)
        self.conv4 = dnn.SAGEConv(hidden_dim, hidden_dim, aggregator_type="mean", feat_drop=0.1)

        self.linear = nn.Linear(hidden_dim, num_classes)

    def forward(self, graph, node_features):
        vec_rep = self.conv1(graph, node_features)
        vec_rep = self.conv2(graph, vec_rep)
        vec_rep = self.conv3(graph, vec_rep)
        vec_rep = self.conv4(graph, vec_rep)

        vec_rep = self.linear(vec_rep)
        return vec_rep

set_seed(42)
model = MyGraphSAGE(graph.ndata["feat"].shape[1], 16, dataset.num_classes)
train_and_test(model)


Epoch 0, total loss: 2.064, train acc: 0.193, val acc: 0.074 (best 0.074)
Epoch 1, total loss: 1.890, train acc: 0.229, val acc: 0.294 (best 0.294)
Epoch 2, total loss: 1.786, train acc: 0.343, val acc: 0.248 (best 0.294)
Epoch 3, total loss: 1.637, train acc: 0.471, val acc: 0.222 (best 0.294)
Epoch 4, total loss: 1.484, train acc: 0.564, val acc: 0.346 (best 0.346)
Epoch 5, total loss: 1.310, train acc: 0.643, val acc: 0.402 (best 0.402)
Epoch 6, total loss: 1.145, train acc: 0.636, val acc: 0.424 (best 0.424)
Epoch 7, total loss: 1.002, train acc: 0.707, val acc: 0.464 (best 0.464)
Epoch 8, total loss: 0.838, train acc: 0.771, val acc: 0.448 (best 0.464)
Epoch 9, total loss: 0.677, train acc: 0.807, val acc: 0.466 (best 0.466)
Epoch 10, total loss: 0.567, train acc: 0.879, val acc: 0.468 (best 0.468)
Epoch 11, total loss: 0.438, train acc: 0.929, val acc: 0.496 (best 0.496)
Epoch 12, total loss: 0.325, train acc: 0.971, val acc: 0.536 (best 0.536)
Epoch 13, total loss: 0.249, train 

Adding more convolutions leads to overfitting.Lets now check different aggreation types

In [None]:
class MyGraphSAGE(nn.Module):
    def __init__(self, in_dim, hidden_dim, num_classes):
        super(MyGraphSAGE, self).__init__()
        self.conv1 = dnn.SAGEConv(in_dim, hidden_dim, aggregator_type="pool", feat_drop=0.1)
        self.conv2 = dnn.SAGEConv(hidden_dim, hidden_dim, aggregator_type="pool", feat_drop=0.1)
        self.linear = nn.Linear(hidden_dim, num_classes)

    def forward(self, graph, node_features):
        vec_rep = self.conv1(graph, node_features)
        vec_rep = self.conv2(graph, vec_rep)
        vec_rep = self.linear(vec_rep)
        return vec_rep

set_seed(42)
model = MyGraphSAGE(graph.ndata["feat"].shape[1], 16, dataset.num_classes)
train_and_test(model)


Epoch 0, total loss: 1.974, train acc: 0.143, val acc: 0.122 (best 0.122)
Epoch 1, total loss: 1.833, train acc: 0.329, val acc: 0.114 (best 0.122)
Epoch 2, total loss: 1.614, train acc: 0.707, val acc: 0.410 (best 0.410)
Epoch 3, total loss: 1.382, train acc: 0.571, val acc: 0.298 (best 0.410)
Epoch 4, total loss: 1.037, train acc: 0.800, val acc: 0.576 (best 0.576)
Epoch 5, total loss: 0.757, train acc: 0.936, val acc: 0.598 (best 0.598)
Epoch 6, total loss: 0.523, train acc: 0.929, val acc: 0.544 (best 0.598)
Epoch 7, total loss: 0.333, train acc: 0.979, val acc: 0.640 (best 0.640)
Epoch 8, total loss: 0.195, train acc: 0.964, val acc: 0.664 (best 0.664)
Epoch 9, total loss: 0.094, train acc: 0.993, val acc: 0.614 (best 0.664)
Epoch 10, total loss: 0.072, train acc: 0.979, val acc: 0.666 (best 0.666)
Epoch 11, total loss: 0.023, train acc: 1.000, val acc: 0.678 (best 0.678)
Epoch 12, total loss: 0.024, train acc: 1.000, val acc: 0.660 (best 0.678)
Epoch 13, total loss: 0.018, train 

This does not seem to help. In order to prevent overfitting well add drop out

In [None]:
class MyGraphSAGE(nn.Module):
    def __init__(self, in_dim, hidden_dim, num_classes):
        super(MyGraphSAGE, self).__init__()
        self.conv1 = dnn.SAGEConv(in_dim, hidden_dim, aggregator_type="mean", feat_drop=0.1)
        self.conv2 = dnn.SAGEConv(hidden_dim, hidden_dim, aggregator_type="mean", feat_drop=0.1)
        self.linear = nn.Linear(hidden_dim, num_classes)

    def forward(self, graph, node_features):
        vec_rep = self.conv1(graph, node_features)
        vec_rep = F.dropout(vec_rep, 0.3)
        vec_rep = self.conv2(graph, vec_rep)
        vec_rep = F.dropout(vec_rep, 0.3)
        vec_rep = self.linear(vec_rep)
        return vec_rep

set_seed(42)
model = MyGraphSAGE(graph.ndata["feat"].shape[1], 16, dataset.num_classes)
train_and_test(model)


Epoch 0, total loss: 1.962, train acc: 0.143, val acc: 0.152 (best 0.152)
Epoch 1, total loss: 1.929, train acc: 0.186, val acc: 0.148 (best 0.152)
Epoch 2, total loss: 1.899, train acc: 0.214, val acc: 0.184 (best 0.184)
Epoch 3, total loss: 1.863, train acc: 0.307, val acc: 0.222 (best 0.222)
Epoch 4, total loss: 1.837, train acc: 0.407, val acc: 0.256 (best 0.256)
Epoch 5, total loss: 1.792, train acc: 0.464, val acc: 0.242 (best 0.256)
Epoch 6, total loss: 1.749, train acc: 0.493, val acc: 0.274 (best 0.274)
Epoch 7, total loss: 1.694, train acc: 0.607, val acc: 0.302 (best 0.302)
Epoch 8, total loss: 1.648, train acc: 0.600, val acc: 0.316 (best 0.316)
Epoch 9, total loss: 1.592, train acc: 0.714, val acc: 0.302 (best 0.316)
Epoch 10, total loss: 1.496, train acc: 0.664, val acc: 0.330 (best 0.330)
Epoch 11, total loss: 1.409, train acc: 0.736, val acc: 0.384 (best 0.384)
Epoch 12, total loss: 1.359, train acc: 0.771, val acc: 0.390 (best 0.390)
Epoch 13, total loss: 1.333, train 

This does not help. Lets try reducing the hidden dimension

In [None]:
class MyGraphSAGE(nn.Module):
    def __init__(self, in_dim, hidden_dim, num_classes):
        super(MyGraphSAGE, self).__init__()
        self.conv1 = dnn.SAGEConv(in_dim, hidden_dim, aggregator_type="mean", feat_drop=0.1)
        self.conv2 = dnn.SAGEConv(hidden_dim, hidden_dim, aggregator_type="mean", feat_drop=0.1)
        self.linear = nn.Linear(hidden_dim, num_classes)

    def forward(self, graph, node_features):
        vec_rep = self.conv1(graph, node_features)
        vec_rep = self.conv2(graph, vec_rep)
        vec_rep = self.linear(vec_rep)
        return vec_rep

set_seed(42)
model = MyGraphSAGE(graph.ndata["feat"].shape[1], 8, dataset.num_classes)
train_and_test(model,num_epochs=30)


Epoch 0, total loss: 1.969, train acc: 0.143, val acc: 0.122 (best 0.122)
Epoch 1, total loss: 1.940, train acc: 0.143, val acc: 0.122 (best 0.122)
Epoch 2, total loss: 1.908, train acc: 0.143, val acc: 0.122 (best 0.122)
Epoch 3, total loss: 1.875, train acc: 0.143, val acc: 0.122 (best 0.122)
Epoch 4, total loss: 1.838, train acc: 0.179, val acc: 0.122 (best 0.122)
Epoch 5, total loss: 1.803, train acc: 0.350, val acc: 0.134 (best 0.134)
Epoch 6, total loss: 1.763, train acc: 0.529, val acc: 0.186 (best 0.186)
Epoch 7, total loss: 1.729, train acc: 0.686, val acc: 0.296 (best 0.296)
Epoch 8, total loss: 1.679, train acc: 0.793, val acc: 0.404 (best 0.404)
Epoch 9, total loss: 1.620, train acc: 0.836, val acc: 0.456 (best 0.456)
Epoch 10, total loss: 1.572, train acc: 0.843, val acc: 0.508 (best 0.508)
Epoch 11, total loss: 1.507, train acc: 0.807, val acc: 0.500 (best 0.508)
Epoch 12, total loss: 1.458, train acc: 0.793, val acc: 0.514 (best 0.514)
Epoch 13, total loss: 1.390, train 

Now Ill try reducing the convolutions and adding another linear layer

In [None]:
class MyGraphSAGE(nn.Module):
    def __init__(self, in_dim, hidden_dim, num_classes):
        super(MyGraphSAGE, self).__init__()
        self.conv1 = dnn.SAGEConv(in_dim, hidden_dim, aggregator_type="mean", feat_drop=0.1)
        self.conv2 = dnn.SAGEConv(hidden_dim, hidden_dim, aggregator_type="mean", feat_drop=0.1)
        self.linear1 = nn.Linear(hidden_dim, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, num_classes)

    def forward(self, graph, node_features):
        vec_rep = self.conv1(graph, node_features)
        vec_rep = self.linear1(vec_rep)
        vec_rep = F.relu(vec_rep)
        vec_rep = self.linear2(vec_rep)
        return vec_rep

set_seed(42)
model = MyGraphSAGE(graph.ndata["feat"].shape[1], 16, dataset.num_classes)
train_and_test(model,num_epochs = 30)


Epoch 0, total loss: 1.960, train acc: 0.129, val acc: 0.276 (best 0.276)
Epoch 1, total loss: 1.950, train acc: 0.200, val acc: 0.322 (best 0.322)
Epoch 2, total loss: 1.939, train acc: 0.236, val acc: 0.330 (best 0.330)
Epoch 3, total loss: 1.927, train acc: 0.279, val acc: 0.324 (best 0.330)
Epoch 4, total loss: 1.913, train acc: 0.336, val acc: 0.322 (best 0.330)
Epoch 5, total loss: 1.897, train acc: 0.400, val acc: 0.332 (best 0.332)
Epoch 6, total loss: 1.879, train acc: 0.436, val acc: 0.354 (best 0.354)
Epoch 7, total loss: 1.856, train acc: 0.714, val acc: 0.320 (best 0.354)
Epoch 8, total loss: 1.832, train acc: 0.750, val acc: 0.156 (best 0.354)
Epoch 9, total loss: 1.803, train acc: 0.679, val acc: 0.144 (best 0.354)
Epoch 10, total loss: 1.772, train acc: 0.664, val acc: 0.174 (best 0.354)
Epoch 11, total loss: 1.737, train acc: 0.686, val acc: 0.184 (best 0.354)
Epoch 12, total loss: 1.698, train acc: 0.693, val acc: 0.196 (best 0.354)
Epoch 13, total loss: 1.655, train 

In the end after testing different sets of hyperparameters the best resulting set was using a hidden layer of 16 with 2 different convolution layers and a linear layer at the end. The convolutions were done with a mean aggregator. For this dataset simpler and smaller models performed best