In [63]:
import dgl
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.data import CoraGraphDataset
from dgl.nn.pytorch import GraphConv
import dgl.function as fn
from sklearn.metrics import roc_auc_score

In [64]:
dataset = CoraGraphDataset()
g = dataset[0]

print(g)

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Graph(num_nodes=2708, num_edges=10556,
      ndata_schemes={'feat': Scheme(shape=(1433,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'train_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})


In [65]:
u, v = g.edges()
eids = np.arange(g.num_edges())
np.random.shuffle(eids)

test_size = int(len(eids) * 0.2)
train_size = g.num_edges() - test_size

test_pos_u = u[eids[:test_size]]
test_pos_v = v[eids[:test_size]]

train_pos_u = u[eids[test_size:]]
train_pos_v = v[eids[test_size:]]

features = g.ndata['feat']
num_nodes = g.num_nodes()

neg_u = torch.randint(0, num_nodes, (test_size,))
neg_v = torch.randint(0, num_nodes, (test_size,))
test_neg_u = neg_u
test_neg_v = neg_v



In [66]:
def score_edges(h, edges):
    u, v = edges
    return (h[u] * h[v]).sum(dim=1)  # dot product

In [67]:
def train(g, train_model, features,epochs=100, lr=0.01):
    optimizer = torch.optim.Adam(train_model.parameters(), lr=lr)
    loss_fn = torch.nn.BCEWithLogitsLoss()

    edges = g.edges()
    num_edges = g.num_edges()

    for epoch in range(epochs):
        model.train()

        # Node embeddings
        h = model(g, features)

        # Positive edges
        pos_u, pos_v = edges

        # Negative sampling (online!)
        neg_u, neg_v = dgl.sampling.global_uniform_negative_sampling(
            g, num_edges
        )

        pos_score = score_edges(h, (pos_u, pos_v))
        neg_score = score_edges(h, (neg_u, neg_v))

        scores = torch.cat([pos_score, neg_score])
        labels = torch.cat([
            torch.ones_like(pos_score),
            torch.zeros_like(neg_score)
        ])

        loss = loss_fn(scores, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        with torch.no_grad():
            auc = compute_auc(pos_score, neg_score)

        if epoch % 5 == 0:
            print(f"Epoch {epoch} | Loss: {loss.item():.4f} | TRAIN AUC: {auc:.4f}")

In [68]:
def eval_auc(model, g_train, features):
    model.eval()
    with torch.no_grad():
        h = model(g_train, features)


        pos_score = pred(test_pos_g, h)
        neg_score = pred(test_neg_g, h)

        auc = compute_auc(pos_score, neg_score)
        print("GCN Link Prediction Test AUC:", auc)

    return auc


In [69]:
from sklearn.metrics import roc_auc_score


def compute_auc(pos_scores, neg_scores):
    scores = torch.cat([pos_scores, neg_scores]).numpy()
    labels = np.concatenate([
        np.ones(len(pos_scores)), np.zeros(len(neg_scores))
    ])
    #print(labels)
    return roc_auc_score(labels, scores)

In [70]:
def compute_loss(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score])
    labels = torch.cat(
        [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]
    )
    return torch.nn.functional.binary_cross_entropy_with_logits(scores, labels)

In [71]:
import dgl.function as fn


class DotPredictor(torch.nn.Module):
    def forward(self, graph, h):
        # h contains the node representations computed from the GNN defined
        # in the node classification section (Section 5.1).
        with graph.local_scope():
            graph.ndata['h'] = h
            graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
            return graph.edata['score']

In [72]:
from dgl.nn.pytorch import GraphConv


class GCN(torch.nn.Module):
    def __init__(self, in_feats, h_feats):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, h_feats)

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = torch.relu(h)
        h = self.conv2(g, h)
        return h


In [73]:
train_g_dgl = dgl.add_self_loop(g)
train_g_dgl.ndata["feat"] = features

test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=num_nodes)
test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=num_nodes)

pred = DotPredictor()
model = GCN(features.shape[1], 16)

In [74]:


# Create the model with given dimensions
in_feats = train_g_dgl.ndata["feat"].shape[1]
h_feats = 16
model = GCN(features.shape[1], 16)

train(train_g_dgl, model, train_g_dgl.ndata["feat"])
eval_auc(model, train_g_dgl, train_g_dgl.ndata["feat"])

Epoch 0 | Loss: 0.6931 | TRAIN AUC: 0.7296
Epoch 5 | Loss: 0.6823 | TRAIN AUC: 0.7316
Epoch 10 | Loss: 0.6747 | TRAIN AUC: 0.7250
Epoch 15 | Loss: 0.6662 | TRAIN AUC: 0.7172
Epoch 20 | Loss: 0.6550 | TRAIN AUC: 0.7141
Epoch 25 | Loss: 0.6377 | TRAIN AUC: 0.7576
Epoch 30 | Loss: 0.6140 | TRAIN AUC: 0.7983
Epoch 35 | Loss: 0.5893 | TRAIN AUC: 0.8200
Epoch 40 | Loss: 0.5685 | TRAIN AUC: 0.8337
Epoch 45 | Loss: 0.5456 | TRAIN AUC: 0.8531
Epoch 50 | Loss: 0.5263 | TRAIN AUC: 0.8782
Epoch 55 | Loss: 0.5165 | TRAIN AUC: 0.8868
Epoch 60 | Loss: 0.5213 | TRAIN AUC: 0.8882
Epoch 65 | Loss: 0.5146 | TRAIN AUC: 0.8957
Epoch 70 | Loss: 0.5135 | TRAIN AUC: 0.8986
Epoch 75 | Loss: 0.5098 | TRAIN AUC: 0.9021
Epoch 80 | Loss: 0.5067 | TRAIN AUC: 0.9069
Epoch 85 | Loss: 0.5068 | TRAIN AUC: 0.9078
Epoch 90 | Loss: 0.5073 | TRAIN AUC: 0.9079
Epoch 95 | Loss: 0.5065 | TRAIN AUC: 0.9107
GCN Link Prediction Test AUC: 0.9191314988305375


0.9191314988305375