# Article Topic Classification

In [1]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root="./datasets/", name="Cora")
data = dataset[0]
dataset.num_classes, data

(7,
 Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708]))

In [2]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATv2Conv

In [3]:
def accuracy(y_pred, y_true):
    return torch.sum(y_pred == y_true) / len(y_true)

In [18]:
class GAT(torch.nn.Module):
    def __init__(self, dim_in, dim_h, dim_out, heads=8):
        super().__init__()
        self.gat1 = GATv2Conv(dim_in, dim_h, heads=heads)
        self.gat2 = GATv2Conv(dim_h * heads, dim_out, heads=1)

    def forward(self, x, edge_index):
        h = F.dropout(x, p=0.6, training=self.training)
        h = self.gat1(x, edge_index)
        h = F.elu(h)
        h = F.dropout(h, p=0.6, training=self.training)
        h = self.gat2(h, edge_index)
        return F.log_softmax(h, dim=1)

    def fit(self, data, epochs):
        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(self.parameters(), lr=0.0005, weight_decay=0.01)

        self.train()
        for epoch in range(epochs + 1):
            optimizer.zero_grad()
            out = self(data.x, data.edge_index)
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            acc = accuracy(out[data.train_mask].argmax(dim=1), data.y[data.train_mask])
            loss.backward()
            optimizer.step()

            if epoch % 20 == 0:
                val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
                val_acc = accuracy(out[data.val_mask].argmax(dim=1), data.y[data.val_mask])
                print(f'Epoch {epoch:>3} | Train Loss: {loss:.3f} | Train Acc: {acc*100:>5.2f}% | Val Loss: {val_loss:.2f} | Val Acc: {val_acc*100:.2f}%')

    @torch.no_grad()
    def test(self, data):
        self.eval()
        out = self(data.x, data.edge_index)
        acc = accuracy(out.argmax(dim=1)[data.test_mask], data.y[data.test_mask])
        return acc

In [19]:
gat = GAT(dataset.num_features, 8, dataset.num_classes)
gat.fit(data, epochs=100)

Epoch   0 | Train Loss: 1.958 | Train Acc: 17.14% | Val Loss: 1.95 | Val Acc: 14.60%
Epoch  20 | Train Loss: 1.554 | Train Acc: 90.71% | Val Loss: 1.72 | Val Acc: 65.80%
Epoch  40 | Train Loss: 1.218 | Train Acc: 93.57% | Val Loss: 1.53 | Val Acc: 73.60%
Epoch  60 | Train Loss: 0.946 | Train Acc: 95.71% | Val Loss: 1.35 | Val Acc: 76.40%
Epoch  80 | Train Loss: 0.716 | Train Acc: 97.86% | Val Loss: 1.19 | Val Acc: 77.00%
Epoch 100 | Train Loss: 0.560 | Train Acc: 97.86% | Val Loss: 1.09 | Val Acc: 79.00%


In [21]:
acc = gat.test(data)
acc.item() * 100

80.59999942779541