<a href="https://colab.research.google.com/github/SeokwonHan02/2023-DSAIL-internship/blob/main/lab4_gat_linkpred.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Modules

In [None]:
import os
import random
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import ConfusionMatrixDisplay
from typing import Tuple, List
from sklearn.metrics import roc_auc_score

In [None]:
SEED = 717
def set_seed(seed: int) -> None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms(True)
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':16:8'
set_seed(SEED)

# Load Dataset

In [None]:
from google.colab import files
f = files.upload()

Saving Cora.zip to Cora.zip


In [None]:
!unzip Cora.zip

Archive:  Cora.zip
  inflating: Cora/edge.pt            
  inflating: Cora/feat.pt            
  inflating: Cora/label.pt           


In [None]:
features: torch.Tensor = torch.load('./Cora/feat.pt')
edges_raw: torch.Tensor = torch.load('./Cora/edge.pt')
labels: torch.Tensor = torch.load('./Cora/label.pt')

num_nodes = features.shape[0]

features = torch.eye(num_nodes)

feat_dim = features.shape[1]
num_edges = edges_raw.shape[1]

print("Number of Total Nodes : ", num_nodes)
print("Number of Total Edges : ", num_edges)
print("Dimension of Node Features : ", feat_dim)

Number of Total Nodes :  2708
Number of Total Edges :  10556
Dimension of Node Features :  2708


In [None]:
indices = torch.randperm(num_edges)
edges = edges_raw[:, indices]

train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1

num_train = int(train_ratio * num_edges)
num_val = int(val_ratio * num_edges)
num_test = num_edges - num_train - num_val

train_edges = edges[:, :num_train]
val_edges = edges[:, num_train:num_train + num_val]
test_edges = edges[:, num_train + num_val:]

print("Number of Training Edges: ", train_edges.shape[1])
print("Number of Validation Edges: ", val_edges.shape[1])
print("Number of Test Edges: ", test_edges.shape[1])

Number of Training Edges:  8444
Number of Validation Edges:  1055
Number of Test Edges:  1057


In [None]:
def add_self_loops(edges_raw: torch.Tensor, num_nodes: int) -> torch.Tensor:
    src, dst = edges_raw
    e_mask = ((src-dst) != 0) # src와 dst가 다른 edge만 mask
    edges_wo_sloops = edges_raw[:, e_mask] # 위의 mask를 사용하여 src와 dst가 다른 edge만 추출

    sloops = torch.arange(num_nodes).repeat(2,1)  # 2 * 2708의 torch 생성
    edges = torch.cat((edges_wo_sloops, sloops),1) # concatenate edge
    return edges

edges = add_self_loops(train_edges, num_nodes)
print("Number of Total Edges after adding self-loops: ", edges.shape[1])

Number of Total Edges after adding self-loops:  11152


# Set Hyperparameters

In [None]:
hyperparameters_GAT = {
    "emb_dim": 8,
    "dropout": 0.6,
    "alpha": 0.2,
    "num_heads": 8,
    "lr": 0.01,
    "weight_decay": 5e-4
}

hyperparameters_GATv2 = {
    "emb_dim": 8,
    "dropout": 0.6,
    "alpha": 0.2,
    "num_heads": 8,
    "lr": 0.01,
    "weight_decay": 5e-4
}

hyperparameters = {
    "GAT": hyperparameters_GAT,
    "GATv2": hyperparameters_GATv2,
    "Training": {
        "num_epochs": 200,
        "train_ratio": 0.8,
        "val_ratio": 0.1
    }
}

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


# Split Indices to train-valid-test

# Generate GAT Layer

In [None]:
class GATLayer(nn.Module):

    def __init__(self, in_dim: int,
                 out_dim: int,
                 activation=lambda x: x,
                 dropout: float= 0.5,
                 alpha: float= 0.2) -> None:

        super(GATLayer, self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.activation = activation
        self.dropout = dropout
        self.alpha = alpha

        self.W = nn.Parameter(torch.empty(size=(in_dim, out_dim)))
        nn.init.xavier_uniform_(self.W.data)
        self.a = nn.Parameter(torch.empty(size=(2*out_dim, 1)))
        nn.init.xavier_uniform_(self.a.data)

        self.leakyrelu = nn.LeakyReLU(negative_slope=self.alpha)

    def forward(self, feat: torch.Tensor,
                edges: torch.Tensor) -> torch.Tensor:
        '''
            - feat: [num_nodes, in_dim]
            - edges: [2, num_edges]
            - out: [num_nodes, out_dim]
        '''

        ########################### Implement Here ###########################
        message = feat @ self.W
        attn_src = message @ self.a[:self.out_dim, :] # self.a의 first half와 message를 곱함
        attn_dst = message @ self.a[self.out_dim:, :] # self.a의 second half와 message를 곱함

        src, dst = edges
        attn_scores = self.leakyrelu(attn_src[src] + attn_dst[dst]) # 각 edge마다 attn_score 계산
        attn_scores = attn_scores - attn_scores.max() # normalize

        # torch.zeros에 각 index 별로 해당하는 src의 값을 더함 -> alpha_vu 얻음
        exp_sum = torch.zeros((feat.shape[0], 1), device = feat.device).scatter_add_(
            dim = 0,
            index = dst.unsqueeze(1), # unsqueeze : add extra dimension to make (num_edges,) to (num_edges, 1)
            src = attn_scores.exp()
        )
        attn_coeffs = attn_scores.exp() / exp_sum[dst]
        attn_coeffs = F.dropout(attn_coeffs, self.dropout, training = self.training)

        # torch,zeros에 attn_coeffs와 message를 곱한 값을 합 하여 h_v^(l) 얻음
        out = torch.zeros_like(message, device = feat.device).scatter_add_(
            dim = 0,
            index = dst.repeat((self.out_dim, 1)).T,
            src = message[src] * attn_coeffs
        )
        out = self.activation(out)

        ######################################################################

        return out

In [None]:
class GAT(nn.Module):

    def __init__(self, feat_dim: int,
                 emb_dim: int,
                 dropout: float=0.5,
                 alpha: float=0.2,
                 num_heads: int=8) -> None:

        super(GAT, self).__init__()
        self.dropout = dropout
        self.num_heads = num_heads

        self.attn_heads = nn.ModuleList([
            GATLayer(feat_dim,
                     emb_dim,
                     activation = nn.ELU(),
                     dropout = dropout,
                     alpha = alpha)
            for _ in range(num_heads)]) # num_head 개의 GATLayer 생성

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.to(self.device)

    def forward(self, feat: torch.Tensor,
                edges: torch.Tensor) -> torch.Tensor:
        '''
            - feat: [num_nodes, in_dim]
            - edges: [2, num_edges]
            - out: [num_nodes, num_classes]
        '''

        ########################### Implement Here ###########################
        feat = F.dropout(feat, self.dropout, training = self.training) # elements of input tensor are randomly set to 0
        h = torch.cat([att(feat, edges) for att in self.attn_heads], dim = 1) # 각 GAT layer에 feat, edge 넣어줌
        out = F.dropout(h, self.dropout, training = self.training)
        ######################################################################

        return out

In [None]:
def train_one_epoch(model, features, edges, lr = 0.01):
  model.train()
  optimizer = torch.optim.Adam(model.parameters(), lr = lr)
  optimizer.zero_grad()

  out = model(features, edges)

  pos_src = edges[0, :]
  pos_dst = edges[1, :]

  pos_scores = torch.sum(out[pos_src] * out[pos_dst], dim=1)
  pos_scores = torch.sigmoid(pos_scores)

  loss = -torch.log(pos_scores).mean()

  loss.backward()
  optimizer.step()

  return float(loss.cpu())

def compute_scores(out, edges):
        src = edges[0, :]
        dst = edges[1, :]
        scores = torch.sum(out[src] * out[dst], dim=1)
        return torch.sigmoid(scores).cpu().numpy()

def generate_neg_edges(pos_edges, num_nodes, num_neg_edges):
    existing_edges = set(tuple(edge) for edge in pos_edges.T)
    neg_edges = []

    while len(neg_edges) < num_neg_edges:
        node1 = np.random.randint(num_nodes)
        node2 = np.random.randint(num_nodes)

        if (node1, node2) not in existing_edges and (node2, node1) not in existing_edges and node1 != node2:
            neg_edges.append((node1, node2))

    return np.array(neg_edges).T

@torch.no_grad()
def test(model, features, train_edges, val_edges, test_edges):
    model.eval()
    out = model(features, train_edges)

    num_nodes = features.shape[0]
    num_neg_edges = len(train_edges[0])

    train_neg_edges = generate_neg_edges(train_edges, num_nodes, num_neg_edges)
    val_neg_edges = generate_neg_edges(np.hstack((train_edges.cpu(), val_edges.cpu())), num_nodes, num_neg_edges)
    test_neg_edges = generate_neg_edges(np.hstack((train_edges.cpu(), val_edges.cpu(), test_edges.cpu())), num_nodes, num_neg_edges)

    train_pos_scores = compute_scores(out, train_edges)
    train_neg_scores = compute_scores(out, train_neg_edges)

    val_pos_scores = compute_scores(out, val_edges)
    val_neg_scores = compute_scores(out, val_neg_edges)

    test_pos_scores = compute_scores(out, test_edges)
    test_neg_scores = compute_scores(out, test_neg_edges)

    train_labels = np.hstack([np.ones(len(train_pos_scores)), np.zeros(len(train_neg_scores))])
    train_scores = np.hstack([train_pos_scores, train_neg_scores])

    val_labels = np.hstack([np.ones(len(val_pos_scores)), np.zeros(len(val_neg_scores))])
    val_scores = np.hstack([val_pos_scores, val_neg_scores])

    test_labels = np.hstack([np.ones(len(test_pos_scores)), np.zeros(len(test_neg_scores))])
    test_scores = np.hstack([test_pos_scores, test_neg_scores])

    train_auc = roc_auc_score(train_labels, train_scores)
    val_auc = roc_auc_score(val_labels, val_scores)
    test_auc = roc_auc_score(test_labels, test_scores)

    return train_auc, val_auc, test_auc

def train(num_epochs, model, features, train_edges, val_edges, test_edges):

    best_val_auc = best_test_auc = 0
    best_epoch = 0
    train_aucs, valid_aucs, test_aucs = [], [], []

    with tqdm(range(1, num_epochs + 1), unit="epoch", desc="Training") as pbar:

        pbar.clear()
        for epoch in pbar:

            loss = train_one_epoch(model, features, train_edges)
            train_auc, val_auc, test_auc = test(model, features, train_edges, val_edges, test_edges)
            train_aucs.append(train_auc)
            valid_aucs.append(val_auc)
            test_aucs.append(test_auc)

            if val_auc > best_val_auc:
                best_val_auc = val_auc
                best_test_auc = test_auc
                best_epoch = epoch
                torch.save(model.state_dict(), "best_model.pt")

            postfix_new = ", ".join([f"Auc. (Train): {train_auc:.4f}",
                                     f"Auc. (Val): {val_auc:.4f} (best: {best_val_auc:.4f})",
                                     f"Auc. (Test): {test_auc:.4f} (best: {best_test_auc:.4f})",
                                     f"loss: {loss:.4f}"])
            pbar.set_postfix_str(postfix_new)

    return best_epoch, best_val_auc, best_test_auc


In [None]:
set_seed(SEED)

model = GAT(feat_dim,
            hyperparameters["GAT"]["emb_dim"],
            hyperparameters["GAT"]["dropout"],
            hyperparameters["GAT"]["alpha"],
            hyperparameters["GAT"]["num_heads"]).to(device)

features = features.to(device)
train_edges = train_edges.to(device)
val_edges = val_edges.to(device)
test_edges = test_edges.to(device)

num_epochs = hyperparameters["Training"]["num_epochs"]

best_epoch, best_val_auc, best_test_aucs = train(num_epochs, model, features, train_edges, val_edges, test_edges)

Training: 100%|██████████| 200/200 [01:20<00:00,  2.50epoch/s, Auc. (Train): 0.8789, Auc. (Val): 0.7893 (best: 0.8209), Auc. (Test): 0.7899 (best: 0.8249), loss: 0.4236]
