# GAT node classification with pytorch

## import packages

In [70]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import numpy as np
import os
import urllib.request
import tarfile

## download Cora dataset

In [71]:
!wget "https://linqs-data.soe.ucsc.edu/public/lbc/cora.tgz" -O cora.tgz
!tar -xvzf cora.tgz

--2023-08-05 12:09:37--  https://linqs-data.soe.ucsc.edu/public/lbc/cora.tgz
Resolving linqs-data.soe.ucsc.edu (linqs-data.soe.ucsc.edu)... 128.114.47.74
Connecting to linqs-data.soe.ucsc.edu (linqs-data.soe.ucsc.edu)|128.114.47.74|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 168052 (164K) [application/x-gzip]
Saving to: 'cora.tgz'

     0K .......... .......... .......... .......... .......... 30% 86.0K 1s
    50K .......... .......... .......... .......... .......... 60%  141K 1s
   100K .......... .......... .......... .......... .......... 91%  107K 0s
   150K .......... ....                                       100% 35.2M=1.4s

2023-08-05 12:09:39 (117 KB/s) - 'cora.tgz' saved [168052/168052]

x cora/
x cora/README
x cora/cora.cites
x cora/cora.content


In [72]:
data_dir = "cora"

citations = pd.read_csv(
    os.path.join(data_dir, "cora.cites"),
    sep="\t",
    header=None,
    names=["target", "source"],
)

papers = pd.read_csv(
    os.path.join(data_dir, "cora.content"),
    sep="\t",
    header=None,
    names=["paper_id"] + [f"term_{idx}" for idx in range(1433)] + ["subject"],
)

In [73]:
# subjectのリナンバリング 
class_idx = {name: id for id, name in enumerate(sorted(papers["subject"].unique()))}
# paperのリナンバリング
paper_idx = {name: idx for idx, name in enumerate(sorted(papers["paper_id"].unique()))}

In [74]:
papers["paper_id"] = papers["paper_id"].apply(lambda name: paper_idx[name])
citations["source"] = citations["source"].apply(lambda name: paper_idx[name])
citations["target"] = citations["target"].apply(lambda name: paper_idx[name])
papers["subject"] = papers["subject"].apply(lambda value: class_idx[value])

In [75]:
features = np.array(papers.iloc[:, 1:-1])
edges = np.array(citations[["target","source"]])
labels = np.array(papers["subject"])

In [76]:
n_features = features.shape[1]
n_classes = len(np.unique(labels))
n_nodes = features.shape[0]

In [77]:
#隣接行列の作成
adj = np.zeros((n_nodes,n_nodes))
for src,tgt in edges:
    adj[src,tgt] = 1

In [78]:
features = torch.from_numpy(features).float()
adj = torch.from_numpy(adj).float()

In [79]:
#データセットの分割
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.5, stratify=labels)

## Implement Dataloader

In [117]:
# データセットのクラス
class CoraDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return idx, self.features[idx], self.labels[idx]

In [118]:
# データセットの作成
train_dataset = CoraDataset(train_features, train_labels)
test_dataset = CoraDataset(test_features, test_labels)

In [119]:
# DataLoaderの作成
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## Implement Graph Attention Layer

In [111]:
# Graph Attention Layer
class GraphAttention(nn.Module):
    def __init__(self, in_features, out_features, dropout=0.6, alpha=0.2):
        super(GraphAttention, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.dropout = dropout
        self.alpha = alpha

        self.W = nn.Parameter(torch.empty(size=(in_features, out_features)))
        self.a = nn.Parameter(torch.empty(size=(2*out_features, 1)))

        self.leakyrelu = nn.LeakyReLU(self.alpha)

        self.init_weights()

    def init_weights(self):
        torch.nn.init.xavier_uniform_(self.W.data, gain=1.414)
        torch.nn.init.xavier_uniform_(self.a.data, gain=1.414)

    def forward(self, input, adj):
        h = torch.mm(input, self.W)
        N = h.size()[0]

        #a_input = torch.cat([h.repeat(1, N).view(N*N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2*self.out_features)
        a_input = torch.cat([h.repeat(1, N).view(N*N, -1), h.repeat(N, 1)], dim=1)
        e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2))

        zero_vec = -9e15*torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)

        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)

        h_prime = torch.matmul(attention, h)
        return h_prime

In [84]:
# Multi-Head Graph Attention Layer
class MultiHeadGraphAttention(nn.Module):
    def __init__(self, in_features, out_features, n_heads, dropout=0.6, alpha=0.2):
        super(MultiHeadGraphAttention, self).__init__()
        self.n_heads = n_heads
        self.heads = nn.ModuleList([GraphAttention(in_features, out_features, dropout=dropout, alpha=alpha) for _ in range(n_heads)])

    def forward(self, input, adj):
        head_outs = [head(input, adj) for head in self.heads]
        return torch.mean(torch.stack(head_outs), dim=0)

In [85]:
# Graph Attention Network
class GAT(nn.Module):
    def __init__(self, n_features, n_classes, n_hidden, n_heads, dropout=0.6, alpha=0.2):
        super(GAT, self).__init__()
        self.n_features = n_features
        self.n_classes = n_classes
        self.n_hidden = n_hidden
        self.n_heads = n_heads
        self.dropout = dropout
        self.alpha = alpha

        self.attentions = nn.ModuleList([MultiHeadGraphAttention(n_features, n_hidden, n_heads, dropout=dropout, alpha=alpha) for _ in range(n_heads)])
        self.out_att = GraphAttention(n_hidden*n_heads, n_classes, dropout=dropout, alpha=alpha)

    def forward(self, input, adj):
        x = input
        for att in self.attentions:
            x = F.elu(att(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.out_att(x, adj)
        return F.log_softmax(x, dim=1)

In [86]:
# モデルの定義
n_hidden = 8
n_heads = 8
model = GAT(n_features, n_classes, n_hidden, n_heads)

## Model Training and Evaluating

In [87]:
# モデルの学習
def train_model(model, optimizer, criterion, data_loader, adj):
    model.train()
    total_loss = 0.0
    for features, labels in data_loader:
        features, labels = features.to(device), labels.to(device)
        optimizer.zero_grad()
        output = model(features, adj)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * features.size(0)
    return total_loss / len(data_loader.dataset)

# モデルの評価
def evaluate_model(model, criterion, data_loader, adj):
    model.eval()
    total_loss = 0.0
    correct = 0
    with torch.no_grad():
        for features, labels in data_loader:
            features, labels = features.to(device), labels.to(device)
            output = model(features, adj)
            total_loss += criterion(output, labels).item() * features.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(labels.view_as(pred)).sum().item()
    return total_loss / len(data_loader.dataset), correct / len(data_loader.dataset)

In [88]:
# 損失関数と最適化手法の定義
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

In [89]:
# 訓練に際して、可能であればGPU（cuda）を設定します。GPUが搭載されていない場合はCPUを使用します
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

Using cpu device


In [112]:
# モデルの学習と評価
n_epochs = 100
model = model.to(device)
for epoch in range(n_epochs):
    train_loss = train_model(model, optimizer, criterion, train_loader, adj)
    test_loss, test_accuracy = evaluate_model(model, criterion, test_loader, adj)
    print(f"Epoch {epoch + 1}/{n_epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

RuntimeError: The size of tensor a (2708) must match the size of tensor b (64) at non-singleton dimension 1

## for debug

In [120]:
indices, input, _ = next(iter(test_loader))
indices.shape,input.shape

(torch.Size([64]), torch.Size([64, 1433]))

In [121]:
indices

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
        54, 55, 56, 57, 58, 59, 60, 61, 62, 63])

In [103]:
W = torch.empty(size=(n_features, n_hidden))

In [104]:
h = torch.mm(input, W)
N = h.size()[0]

a_input = torch.cat([h.repeat(1, N).view(N*N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2*n_hidden)

In [106]:
h.shape

torch.Size([64, 8])

In [110]:
torch.cat([h.repeat(1, N).view(N*N, -1), h.repeat(N, 1)], dim=1).shape #これをLeaklyReluに通す

torch.Size([4096, 16])

In [130]:
indices.expand(N,N).reshape(N*N,-1)

tensor([[ 0],
        [ 1],
        [ 2],
        ...,
        [61],
        [62],
        [63]])

In [133]:
indices.expand(N,N).permute(1,0).reshape(N*N,-1)

tensor([[ 0],
        [ 0],
        [ 0],
        ...,
        [63],
        [63],
        [63]])

In [135]:
indices_expand = torch.cat([indices.expand(N,N).reshape(N*N,-1),indices.expand(N,N).permute(1,0).reshape(N*N,-1)], dim=1)
indices_expand

tensor([[ 0,  0],
        [ 1,  0],
        [ 2,  0],
        ...,
        [61, 63],
        [62, 63],
        [63, 63]])