# Citeseer Dataset Pytorch Implementation

## Importing Libraries

In [1]:
import matplotlib.pyplot as plt
import networkx as nx
from torch_geometric.datasets import Planetoid
import numpy as np
import torch
from torch_geometric.nn import GATConv
import torch.nn.functional as F

## Loading Dataset

In [2]:
# Load the Citeseer dataset
dataset = Planetoid(root="data/Citeseer", name="Citeseer")
data = dataset[0]
print(dataset)
print(data)
print("number of graphs:\t\t",len(dataset))
print("number of classes:\t\t",dataset.num_classes)
print("number of classes:\t\t",np.unique(data.y))
print(f"Number of nodes:\t\t",data.num_nodes)
print("number of node features:\t",data.num_node_features)
print("number of edge features:\t",data.num_edge_features)
print("X shape: ", data.x.shape)
print("Edge shape: ", data.edge_index.shape)
print("Y shape: ", data.y.shape)

Citeseer()
Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])
number of graphs:		 1
number of classes:		 6
number of classes:		 [0 1 2 3 4 5]
Number of nodes:		 3327
number of node features:	 3703
number of edge features:	 0
X shape:  torch.Size([3327, 3703])
Edge shape:  torch.Size([2, 9104])
Y shape:  torch.Size([3327])


## Model

In [3]:
class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=8):
        super(GAT, self).__init__()
        self.gat1 = GATConv(in_channels, hidden_channels, heads=heads, dropout=0.6)
        self.gat2 = GATConv(hidden_channels * heads, out_channels, heads=8, dropout=0.6)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.gat1(x, edge_index))
        x = F.log_softmax(self.gat2(x, edge_index), dim=1)
        return x


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = GAT(
    in_channels=dataset.num_node_features,
    hidden_channels=8,
    out_channels=dataset.num_classes,
    heads=8
).to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
criterion = torch.nn.NLLLoss()


In [5]:
def train(model, data, optimizer, criterion):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    
    train_mask = data.train_mask.bool()
    loss = criterion(out[train_mask], data.y[train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def test(model, data, mask):
    model.eval()
    with torch.no_grad():
        out = model(data)
        pred = out.argmax(dim=1)
        correct = (pred[mask] == data.y[mask]).sum()
        acc = int(correct) / int(mask.sum())
    return acc

In [6]:
for epoch in range(200):
    loss = train(model, data, optimizer, criterion)
    train_acc = test(model, data, data.train_mask.bool())
    val_acc = test(model, data, data.val_mask.bool())
    test_acc = test(model, data, data.test_mask.bool())
    print(f"Epoch {epoch+1}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Test Acc : {test_acc:.4f}")

Epoch 1, Loss: 3.8693, Train Acc: 0.6833, Val Acc: 0.3920, Test Acc : 0.3820
Epoch 2, Loss: 3.6512, Train Acc: 0.8000, Val Acc: 0.5280, Test Acc : 0.5090
Epoch 3, Loss: 3.3702, Train Acc: 0.8667, Val Acc: 0.5520, Test Acc : 0.5420
Epoch 4, Loss: 2.9549, Train Acc: 0.8750, Val Acc: 0.5660, Test Acc : 0.5680
Epoch 5, Loss: 2.8228, Train Acc: 0.8917, Val Acc: 0.5800, Test Acc : 0.5770
Epoch 6, Loss: 2.3719, Train Acc: 0.8833, Val Acc: 0.5680, Test Acc : 0.5710
Epoch 7, Loss: 2.0482, Train Acc: 0.9167, Val Acc: 0.5740, Test Acc : 0.5770
Epoch 8, Loss: 1.7576, Train Acc: 0.9417, Val Acc: 0.5840, Test Acc : 0.5940
Epoch 9, Loss: 1.7642, Train Acc: 0.9500, Val Acc: 0.6060, Test Acc : 0.6060
Epoch 10, Loss: 1.4870, Train Acc: 0.9500, Val Acc: 0.6240, Test Acc : 0.6130
Epoch 11, Loss: 1.2965, Train Acc: 0.9583, Val Acc: 0.6300, Test Acc : 0.6140
Epoch 12, Loss: 1.0744, Train Acc: 0.9583, Val Acc: 0.6500, Test Acc : 0.6220
Epoch 13, Loss: 1.1984, Train Acc: 0.9667, Val Acc: 0.6660, Test Acc : 0.