In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score
import random
import numpy as np
import networkx as nx

In [None]:
dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())
data = dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [None]:
G = nx.Graph()
edge_list = data.edge_index.t().tolist()
G.add_edges_from(edge_list)


In [None]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [None]:
lid = {node: len(list(G.neighbors(node))) for node in G.nodes()}  # Degree as LID
hub_scores, auth_scores = nx.hits(G, max_iter=100, normalized=True)


In [None]:
lid_tensor = torch.tensor([lid[i] for i in range(data.num_nodes)], dtype=torch.float32).unsqueeze(1)
hub_tensor = torch.tensor([hub_scores[i] for i in range(data.num_nodes)], dtype=torch.float32).unsqueeze(1)
auth_tensor = torch.tensor([auth_scores[i] for i in range(data.num_nodes)], dtype=torch.float32).unsqueeze(1)

In [None]:
data.x = torch.cat([data.x, lid_tensor, hub_tensor, auth_tensor], dim=1)


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)

In [None]:
class GAT(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GAT, self).__init__()
        self.gat1 = GATConv(in_channels, 8, heads=8, dropout=0.6)
        self.gat2 = GATConv(8 * 8, out_channels, heads=1, concat=False, dropout=0.6)

    def forward(self, x, edge_index):
        x = F.dropout(x, p=0.6, training=self.training)
        x = F.elu(self.gat1(x, edge_index))
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.gat2(x, edge_index)
        return x

In [None]:
model = GAT(data.num_node_features, 64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [None]:
model.eval()
embeddings = model(data.x, data.edge_index).detach().cpu().numpy()

In [None]:
positive_edges = data.edge_index.t().cpu().numpy()
positive_edges = [tuple(e) for e in positive_edges if e[0] < e[1]]


In [None]:
num_samples = len(positive_edges)
all_nodes = list(range(data.num_nodes))
negative_edges = set()
while len(negative_edges) < num_samples:
    u, v = random.sample(all_nodes, 2)
    if (u, v) not in positive_edges and (v, u) not in positive_edges:
        negative_edges.add((u, v))


In [None]:
X, y = [], []
for u, v in positive_edges[:num_samples]:
    X.append(embeddings[u] * embeddings[v])  # Hadamard product
    y.append(1)
for u, v in list(negative_edges):
    X.append(embeddings[u] * embeddings[v])
    y.append(0)

X = np.array(X)
y = np.array(y)

In [None]:
indices = np.arange(len(X))
np.random.shuffle(indices)
split = int(0.8 * len(X))
X_train, X_test = X[indices[:split]], X[indices[split:]]
y_train, y_test = y[indices[:split]], y[indices[split:]]

In [None]:
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)
y_pred = lda.predict(X_test)
y_prob = lda.predict_proba(X_test)[:, 1]

In [None]:
auc = roc_auc_score(y_test, y_prob)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)


In [None]:
print(f"AUC: {auc:.4f}\nF1 Score: {f1:.4f}\nPrecision: {precision:.4f}\nRecall: {recall:.4f}")

AUC: 0.9117
F1 Score: 0.8311
Precision: 0.8215
Recall: 0.8409
