<a href="https://colab.research.google.com/github/Adamphoenix003/GNN-LinkPrediction/blob/main/GCNCora.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch-geometric


Collecting torch-geometric
  Downloading torch_geometric-2.7.0-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.7/63.7 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.7.0-py3-none-any.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-geometric
Successfully installed torch-geometric-2.7.0


In [4]:
import torch
import numpy as np
import pandas as pd
from torch_geometric.data import Data

content_path = "/content/cora.content"
cites_path = "/content/cora.cites"

content = pd.read_csv(content_path, sep="\t", header=None)
cites = pd.read_csv(cites_path, sep="\t", header=None)
# -----------------------
# Process Nodes
# -----------------------

node_ids = content[0].values
features = content.iloc[:, 1:-1].values
labels = content.iloc[:, -1].values

# Map node IDs to indices
id_map = {j: i for i, j in enumerate(node_ids)}

# Convert features to tensor
x = torch.tensor(features, dtype=torch.float)

# -----------------------
# Process Edges
# -----------------------

edges = cites.values
edge_index = []

for src, dst in edges:
    if src in id_map and dst in id_map:
        edge_index.append([id_map[src], id_map[dst]])

edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()

# Make undirected
edge_index = torch.cat([edge_index, edge_index.flip(0)], dim=1)

# Create Data object
data = Data(x=x, edge_index=edge_index)

print(data)


Data(x=[2708, 1433], edge_index=[2, 10858])


In [5]:
from torch_geometric.transforms import RandomLinkSplit

transform = RandomLinkSplit(
    num_val=0.1,
    num_test=0.2,
    is_undirected=True,
    add_negative_train_samples=True
)

train_data, val_data, test_data = transform(data)


In [6]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch import nn

class GCNEncoder(nn.Module):
    def __init__(self, in_channels, hidden_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x


In [7]:
class LinkPredictor(nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin = nn.Linear(hidden_channels, 1)

    def forward(self, z, edge_label_index):
        src, dst = edge_label_index
        h = z[src] * z[dst]   # Hadamard
        return torch.sigmoid(self.lin(h)).view(-1)


In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

encoder = GCNEncoder(x.shape[1], 128).to(device)
predictor = LinkPredictor(128).to(device)

optimizer = torch.optim.Adam(
    list(encoder.parameters()) + list(predictor.parameters()),
    lr=0.01
)

train_data = train_data.to(device)
val_data = val_data.to(device)
test_data = test_data.to(device)


In [9]:
def train():
    encoder.train()
    predictor.train()
    optimizer.zero_grad()

    z = encoder(train_data.x, train_data.edge_index)

    pred = predictor(z, train_data.edge_label_index)
    loss = F.binary_cross_entropy(pred, train_data.edge_label.float())

    loss.backward()
    optimizer.step()

    return loss.item()


In [10]:
from sklearn.metrics import roc_auc_score, average_precision_score

@torch.no_grad()
def test(data):
    encoder.eval()
    predictor.eval()

    z = encoder(data.x, data.edge_index)
    pred = predictor(z, data.edge_label_index).cpu()
    y = data.edge_label.cpu()

    auc = roc_auc_score(y, pred)
    ap = average_precision_score(y, pred)

    return auc, ap


In [11]:
for epoch in range(1, 201):
    loss = train()

    if epoch % 20 == 0:
        val_auc, val_ap = test(val_data)
        print(f"Epoch {epoch:03d}, Loss: {loss:.4f}, Val AUC: {val_auc:.4f}")


Epoch 020, Loss: 0.3498, Val AUC: 0.8437
Epoch 040, Loss: 0.0516, Val AUC: 0.8952
Epoch 060, Loss: 0.0030, Val AUC: 0.8860
Epoch 080, Loss: 0.0003, Val AUC: 0.8865
Epoch 100, Loss: 0.0001, Val AUC: 0.8882
Epoch 120, Loss: 0.0001, Val AUC: 0.8882
Epoch 140, Loss: 0.0000, Val AUC: 0.8885
Epoch 160, Loss: 0.0000, Val AUC: 0.8881
Epoch 180, Loss: 0.0000, Val AUC: 0.8881
Epoch 200, Loss: 0.0000, Val AUC: 0.8877


In [12]:
test_auc, test_ap = test(test_data)

print("Test AUC:", test_auc)
print("Test AP:", test_ap)


Test AUC: 0.8927579689524092
Test AP: 0.9029274557589705
