SAIDL 2024 Spring Assignment

Author: Shanay Mehta

In [1]:
!pip install torch_geometric



In [2]:
import torch_geometric as tg
import torch
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
import torch.nn as nn
import torch.nn.functional as F

In [3]:
torch.random.manual_seed(1)

<torch._C.Generator at 0x7ec1542fe6b0>

In [4]:
name_data = 'Cora'
dataset = Planetoid(root= '/tmp/Cora', name = name_data)
dataset.transform = T.NormalizeFeatures()

print(f"Number of Classes in {name_data}:", dataset.num_classes)
print(f"Number of Node Features in {name_data}:", dataset.num_node_features)

Number of Classes in Cora: 7
Number of Node Features in Cora: 1433


In [5]:
data = dataset[0]

In [6]:
print(data.train_mask.sum().item())
print(data.val_mask.sum().item())
print(data.test_mask.sum().item())

140
500
1000


In [7]:
print(data.num_features)

1433


In [8]:
print(data.x.shape)

torch.Size([2708, 1433])


In [9]:
class GATLayer(nn.Module):
    def __init__(self, in_features, out_features, num_heads, dropout, alpha, concat=True):
        super(GATLayer, self).__init__()
        self.dropout = dropout
        self.alpha = alpha
        self.concat = concat
        self.in_features = in_features
        self.out_features = out_features
        self.num_heads = num_heads

        self.W = nn.Parameter(torch.zeros(size=(in_features, out_features * num_heads)))
        nn.init.xavier_uniform_(self.W.data, gain=1.414) # Original GAT paper uses Xavier Glorot initialisation for Cora dataset

        self.a = nn.Parameter(torch.zeros(size=(2*num_heads*out_features, 1)))
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, input, adj_mat):

        h = torch.mm(input, self.W).view(-1, self.num_heads*self.out_features) # matrix multiplication # N x (head*out_feature)
        N = h.size()[0]

        a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2 * self.num_heads * self.out_features)
        e       = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2))

        # Masked Attention
        zero_vec  = -9e15*torch.ones_like(e)
        attention = torch.where(adj_mat > 0, e, zero_vec)

        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime = torch.matmul(attention, h).view(-1, self.num_heads * self.out_features)  # Concatenate attention heads

        if self.concat == False:
            # Take the mean across heads
            h_prime = h_prime.view(-1, self.num_heads, self.out_features)
            h_prime = h_prime.mean(dim=1)

        return h_prime

The architecture of the model is completely inspired from the original GAT paper.

In [10]:
class GAT(nn.Module):
    def __init__(self, num_features):
        super(GAT, self).__init__()
        self.num_head1 = 8
        self.layer1 = GATLayer(num_features, out_features = 8, num_heads = 8, dropout = 0.6, alpha = 0.2)
        self.layer2 = GATLayer(in_features = 8*8, out_features = 7, num_heads = 1, dropout = 0.6, alpha = 0.2, concat = False)

    def forward(self, input, adj_mat):
        input = F.dropout(input, p=0.6, training=self.training)
        a1 = self.layer1.forward(input, adj_mat)
        a1 = F.elu(a1)
        a1 = F.dropout(a1, p=0.6, training=self.training)
        a2 = self.layer2.forward(a1, adj_mat)
        return F.softmax(a2, dim=1)

In [11]:
model = GAT(data.num_features).to('cpu')
optimizer = torch.optim.Adam(model.parameters(), lr = 0.005, weight_decay = 0.0005)
criterion = nn.CrossEntropyLoss()

In [12]:
adj = torch.zeros((data.num_nodes, data.num_nodes))
for i, j in data.edge_index.t().tolist():
    adj[i, j] = 1

In [13]:
def train(model, data, optimizer, criterion, num_epochs=20):

    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        output = model.forward(data.x,adj)
        loss = criterion(output[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_output = model.forward(data.x,adj)
            val_loss = criterion(val_output[data.val_mask], data.y[data.val_mask])
            pred = val_output.max(dim=1)
            correct = float(pred.indices[data.val_mask].eq(data.y[data.val_mask]).sum().item())
            acc = correct / data.val_mask.sum().item()

        print(f"Epoch {epoch}: Loss: {loss.item()}, Val Loss: {val_loss.item()}, Val Acc: {acc}")


In [14]:
train(model, data, optimizer, criterion)

Epoch 0: Loss: 1.9458681344985962, Val Loss: 1.9452917575836182, Val Acc: 0.296
Epoch 1: Loss: 1.945309042930603, Val Loss: 1.9447896480560303, Val Acc: 0.408
Epoch 2: Loss: 1.9439773559570312, Val Loss: 1.9442617893218994, Val Acc: 0.486
Epoch 3: Loss: 1.9430537223815918, Val Loss: 1.9437885284423828, Val Acc: 0.53
Epoch 4: Loss: 1.9416532516479492, Val Loss: 1.9433534145355225, Val Acc: 0.56
Epoch 5: Loss: 1.9408105611801147, Val Loss: 1.9429141283035278, Val Acc: 0.592
Epoch 6: Loss: 1.940830111503601, Val Loss: 1.9424846172332764, Val Acc: 0.632
Epoch 7: Loss: 1.941225528717041, Val Loss: 1.942089557647705, Val Acc: 0.658
Epoch 8: Loss: 1.9380977153778076, Val Loss: 1.941712498664856, Val Acc: 0.666
Epoch 9: Loss: 1.938604712486267, Val Loss: 1.9413706064224243, Val Acc: 0.67
Epoch 10: Loss: 1.9363152980804443, Val Loss: 1.9410197734832764, Val Acc: 0.684
Epoch 11: Loss: 1.9383045434951782, Val Loss: 1.9406660795211792, Val Acc: 0.704
Epoch 12: Loss: 1.9386428594589233, Val Loss: 1

In [15]:
model.eval()
with torch.no_grad():
  test_output = model.forward(data.x,adj)
  test_loss = criterion(test_output[data.test_mask], data.y[data.test_mask])
  pred = test_output.max(dim=1)
  correct = float(pred.indices[data.test_mask].eq(data.y[data.test_mask]).sum().item())
  acc = correct / data.test_mask.sum().item()

print(acc)

0.735


In [16]:
print(pred.indices[data.test_mask])

tensor([1, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 6,
        2, 1, 3, 3, 3, 3, 3, 3, 1, 4, 3, 3, 6, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5,
        5, 5, 2, 2, 2, 2, 2, 6, 6, 1, 0, 6, 0, 0, 5, 0, 2, 5, 3, 0, 0, 6, 6, 5,
        1, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 2, 3, 1, 3, 3,
        3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 4, 4, 4, 4, 3, 3, 2, 5, 5, 5, 5,
        6, 5, 5, 5, 5, 6, 4, 3, 5, 0, 1, 6, 0, 0, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0,
        0, 0, 0, 0, 3, 0, 5, 3, 3, 3, 3, 3, 3, 1, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
        6, 6, 5, 6, 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 3, 3, 3, 3, 3, 1, 2, 3, 3, 6,
        3, 3, 1, 3, 1, 4, 4, 4, 3, 3, 3,