In [None]:
!pip install torch networkx torch-geometric --quiet
from torch_geometric.datasets import Planetoid
import numpy as np

# seed = 42
# torch.manual_seed(seed)
# np.random.seed(seed)
# random.seed(seed)
# if torch.cuda.is_available():
#     torch.cuda.manual_seed(seed)
#     torch.backends.cudnn.deterministic = True

# Load the Citeseer dataset
dataset = Planetoid(root='data/Citeseer', name='CiteSeer')

# Extract the graph data object
data = dataset[0]
print(dataset)
print("number of graphs:\t\t",len(dataset))
print("number of classes:\t\t",dataset.num_classes)
print("number of classes:\t\t",np.unique(data.y))
print("number of node features:\t",data.num_node_features)
print("number of edge features:\t",data.num_edge_features)
print("X shape: ", data.x.shape)
print("Edge shape: ", data.edge_index.shape)
print("Y shape: ", data.y.shape)


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m
[?25h

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...


CiteSeer()
number of graphs:		 1
number of classes:		 6
number of classes:		 [0 1 2 3 4 5]
number of node features:	 3703
number of edge features:	 0
X shape:  torch.Size([3327, 3703])
Edge shape:  torch.Size([2, 9104])
Y shape:  torch.Size([3327])


Done!


In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GATConv
from torch_geometric.transforms import NormalizeFeatures
import random
import numpy as np
# Define the GAT Model
class GAT(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, heads=8, dropout=0.6):
        super(GAT, self).__init__()
        self.dropout = dropout

        # First GAT layer: Multi-head attention
        self.gat1 = GATConv(input_dim, hidden_dim, heads=heads, dropout=dropout)
        # Second GAT layer: Single-head attention
        self.gat2 = GATConv(hidden_dim * heads, output_dim, heads=1, concat=False, dropout=dropout)

    def forward(self, x, edge_index):
        # Apply dropout to input features
        x = F.dropout(x, p=self.dropout, training=self.training)
        # First GAT layer + ELU activation
        x = self.gat1(x, edge_index)
        x = F.elu(x)
        # Apply dropout after first layer
        x = F.dropout(x, p=self.dropout, training=self.training)
        # Second GAT layer + LogSoftmax
        x = self.gat2(x, edge_index)
        return F.log_softmax(x, dim=1)


# Model, optimizer, and training setup
input_dim = dataset.num_features
hidden_dim = 8  # Number of features per attention head
output_dim = dataset.num_classes
heads = 8  # Number of attention heads in the first layer
dropout = 0.6  # Dropout rate

model = GAT(input_dim, hidden_dim, output_dim, heads=heads, dropout=dropout)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)


# Training and Testing functions
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss


@torch.no_grad()
def test():
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        correct = (pred[mask] == data.y[mask]).sum()
        accs.append(int(correct) / int(mask.sum()))
    return accs

In [None]:

# Training loop with best accuracy tracking
best_val_acc = 0.0
best_test_acc = 0.0

for epoch in range(200):  # Train for 200 epochs as per the paper
    loss = train()
    train_acc, val_acc, test_acc = test()

    # Track best validation and corresponding test accuracy
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_test_acc = test_acc

    print(f'Epoch {epoch+1:03d}, Loss: {loss:.4f}, Train: {train_acc:.4f}, '
          f'Val: {val_acc:.4f}, Test: {test_acc:.4f}, Best Test: {best_test_acc:.4f}')

print(f"Best Validation Accuracy: {best_val_acc:.4f}")
print(f"Best Test Accuracy: {best_test_acc:.4f}")


Epoch 001, Loss: 1.8010, Train: 0.6500, Val: 0.4520, Test: 0.4330, Best Test: 0.4330
Epoch 002, Loss: 1.5961, Train: 0.7167, Val: 0.5200, Test: 0.4760, Best Test: 0.4760
Epoch 003, Loss: 1.5153, Train: 0.8583, Val: 0.5720, Test: 0.5640, Best Test: 0.5640
Epoch 004, Loss: 1.2878, Train: 0.9167, Val: 0.6440, Test: 0.6210, Best Test: 0.6210
Epoch 005, Loss: 1.1768, Train: 0.9250, Val: 0.6620, Test: 0.6530, Best Test: 0.6530
Epoch 006, Loss: 1.0899, Train: 0.9500, Val: 0.6680, Test: 0.6590, Best Test: 0.6590
Epoch 007, Loss: 1.0230, Train: 0.9500, Val: 0.6720, Test: 0.6630, Best Test: 0.6630
Epoch 008, Loss: 0.8661, Train: 0.9417, Val: 0.6640, Test: 0.6650, Best Test: 0.6630
Epoch 009, Loss: 0.9749, Train: 0.9417, Val: 0.6640, Test: 0.6660, Best Test: 0.6630
Epoch 010, Loss: 0.9270, Train: 0.9417, Val: 0.6600, Test: 0.6630, Best Test: 0.6630
Epoch 011, Loss: 0.7942, Train: 0.9583, Val: 0.6580, Test: 0.6570, Best Test: 0.6630
Epoch 012, Loss: 0.9101, Train: 0.9583, Val: 0.6580, Test: 0.6520

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GATConv
from torch_geometric.loader import DataLoader
import torch.optim as optim

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
# Load the Citeseer dataset
dataset = Planetoid(root="data/Citeseer", name="Citeseer")
data = dataset[0]

# Feature normalization (row-normalize)
data.x = data.x / data.x.sum(dim=1, keepdim=True).clamp(min=1e-10)

# Define GAT Model
class GAT(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, dropout_rate):
        super(GAT, self).__init__()
        # First GAT layer with multiple attention heads
        self.gat1 = GATConv(input_dim, hidden_dim, heads=num_heads, dropout=dropout_rate)
        # Second GAT layer for classification
        self.gat2 = GATConv(hidden_dim * num_heads, output_dim, heads=1, concat=False, dropout=dropout_rate)
        self.dropout_rate = dropout_rate

    def forward(self, x, edge_index):
        x = self.gat1(x, edge_index)
        x = F.relu(x)  # First layer activation
        x = F.dropout(x, p=self.dropout_rate, training=self.training)
        x = self.gat2(x, edge_index)
        return F.log_softmax(x, dim=1)


# Hyperparameters
input_dim = dataset.num_node_features      # Number of input features
hidden_dim = 8                             # Features per attention head
output_dim = dataset.num_classes           # Number of classes
num_heads = 8                              # Number of attention heads
dropout_rate = 0.6                         # Dropout rate
weight_decay = 5e-4                        # L2 regularization factor
learning_rate = 0.01                       # Learning rate
max_epochs = 200                           # Maximum epochs
early_stopping_patience = 10               # Early stopping window size

# Initialize model
model = GAT(input_dim, hidden_dim, output_dim, num_heads, dropout_rate)
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# Xavier initialization for GCNConv weights
def init_weights(m):
    if isinstance(m, GATConv):
        torch.nn.init.xavier_uniform_(m.lin.weight)  # Initialize the linear transformation weights
        if m.bias is not None:
            torch.nn.init.zeros_(m.bias)  # Initialize bias to zeros

# Apply the initialization
model.apply(init_weights)

# Training and evaluation functions
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss

@torch.no_grad()
def evaluate():
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        correct = (pred[mask] == data.y[mask]).sum()
        accs.append(int(correct) / int(mask.sum()))
    return accs

# Training loop with early stopping
best_val_acc = 0
best_test_acc = 0
patience = 0

for epoch in range(1, max_epochs + 1):
    loss = train()
    train_acc, val_acc, test_acc = evaluate()

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_test_acc = test_acc
        patience = 0
    else:
        patience += 1

    print(f"Epoch {epoch:03d}, Loss: {loss:.4f}, Train: {train_acc:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}")

    if patience == early_stopping_patience:
        print("Early stopping!")
        break

print(f"Best Validation Accuracy: {best_val_acc:.4f}")
print(f"Corresponding Test Accuracy: {best_test_acc:.4f}")


Epoch 001, Loss: 1.7920, Train: 0.2667, Val: 0.2560, Test: 0.2140
Epoch 002, Loss: 1.7880, Train: 0.3250, Val: 0.3140, Test: 0.2880
Epoch 003, Loss: 1.7833, Train: 0.4750, Val: 0.3260, Test: 0.2950
Epoch 004, Loss: 1.7801, Train: 0.8417, Val: 0.5920, Test: 0.5680
Epoch 005, Loss: 1.7676, Train: 0.7667, Val: 0.5520, Test: 0.5550
Epoch 006, Loss: 1.7703, Train: 0.7417, Val: 0.5240, Test: 0.5220
Epoch 007, Loss: 1.7549, Train: 0.7667, Val: 0.5520, Test: 0.5540
Epoch 008, Loss: 1.7453, Train: 0.8083, Val: 0.5660, Test: 0.5740
Epoch 009, Loss: 1.7421, Train: 0.8667, Val: 0.5860, Test: 0.5860
Epoch 010, Loss: 1.7290, Train: 0.8917, Val: 0.6280, Test: 0.6230
Epoch 011, Loss: 1.7206, Train: 0.9000, Val: 0.6520, Test: 0.6690
Epoch 012, Loss: 1.7104, Train: 0.9167, Val: 0.6960, Test: 0.7120
Epoch 013, Loss: 1.6889, Train: 0.9167, Val: 0.7220, Test: 0.7210
Epoch 014, Loss: 1.6789, Train: 0.9167, Val: 0.7040, Test: 0.7260
Epoch 015, Loss: 1.6787, Train: 0.9000, Val: 0.7060, Test: 0.7200
Epoch 016,