In [705]:
import dgl
import numpy as np
import torch
from dgl.data import CoraGraphDataset

In [706]:
dataset = CoraGraphDataset()
g = dataset[0]

print(g)

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Graph(num_nodes=2708, num_edges=10556,
      ndata_schemes={'feat': Scheme(shape=(1433,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'train_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})


In [707]:
u, v = g.edges()
eids = np.arange(g.num_edges())
np.random.shuffle(eids)

test_size = int(len(eids) * 0.1)
train_size = g.num_edges() - test_size

test_pos_u = u[eids[:test_size]]
test_pos_v = v[eids[:test_size]]

train_pos_u = u[eids[test_size:]]
train_pos_v = v[eids[test_size:]]

features = g.ndata['feat']
num_nodes = g.num_nodes()

neg_u = torch.randint(0, num_nodes, (test_size,))
neg_v = torch.randint(0, num_nodes, (test_size,))
test_neg_u = neg_u
test_neg_v = neg_v



In [708]:
def score_edges(h, edges):
    u, v = edges
    return (h[u] * h[v]).sum(dim=1)  # dot product

In [709]:
def train(g, train_model, features,epochs=100, lr=0.05):
    optimizer = torch.optim.Adam(train_model.parameters(), lr=lr)
    loss_fn = torch.nn.BCEWithLogitsLoss()


    num_edges = len(train_pos_u)

    for epoch in range(epochs):
        model.train()

        # Node embeddings
        h = model(g, features)

        # Positive edges
        pos_u, pos_v = train_pos_u, train_pos_v

        # Negative sampling (online!)
        train_neg_u, train_neg_v = dgl.sampling.global_uniform_negative_sampling(
            g, num_edges
        )

        pos_score = score_edges(h, (pos_u, pos_v))
        neg_score = score_edges(h, (train_neg_u, train_neg_v))

        scores = torch.cat([pos_score, neg_score])
        labels = torch.cat([
            torch.ones_like(pos_score),
            torch.zeros_like(neg_score)
        ])

        loss = loss_fn(scores, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        with torch.no_grad():
            auc = compute_auc(pos_score, neg_score)

        if epoch % 5 == 0:
            print(f"Epoch {epoch} | Loss: {loss.item():.4f} | TRAIN AUC: {auc:.4f}")

In [710]:
def eval_auc(model, features):
    model.eval()
    with torch.no_grad():
        # Using train_g_gdl to get the embeddings
        h = model(train_g_dgl, features)


        pos_score = pred(test_pos_g, h)
        neg_score = pred(test_neg_g, h)

        auc = compute_auc(pos_score, neg_score)
        print("GCN Link Prediction Test AUC:", auc)

    # return auc


In [711]:
from sklearn.metrics import roc_auc_score


def compute_auc(pos_scores, neg_scores):
    scores = torch.cat([pos_scores, neg_scores]).numpy()
    labels = np.concatenate([
        np.ones(len(pos_scores)), np.zeros(len(neg_scores))
    ])
    #print(labels)
    return roc_auc_score(labels, scores)

In [712]:
import dgl.function as fn


class DotPredictor(torch.nn.Module):
    def forward(self, graph, h):

        with graph.local_scope():
            graph.ndata['h'] = h
            graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))

            return graph.edata['score']

In [713]:
from dgl.nn.pytorch import GraphConv


class GCN(torch.nn.Module):
    def __init__(self, in_feats, h_feats):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, h_feats)

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = torch.relu(h)
        h = self.conv2(g, h)
        return h


In [714]:
train_g_dgl = dgl.remove_edges(g, eids[:test_size])
train_g_dgl = dgl.add_self_loop(train_g_dgl)
train_g_dgl.ndata["feat"] = features

test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=num_nodes)
test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=num_nodes)

pred = DotPredictor()

In [715]:
in_feats = train_g_dgl.ndata["feat"].shape[1]
h_feats = 16
model = GCN(in_feats, h_feats)

train(train_g_dgl, model, train_g_dgl.ndata["feat"])
eval_auc(model, train_g_dgl.ndata["feat"])

Epoch 0 | Loss: 0.6931 | TRAIN AUC: 0.7729
Epoch 5 | Loss: 0.6770 | TRAIN AUC: 0.7790
Epoch 10 | Loss: 0.6399 | TRAIN AUC: 0.7927
Epoch 15 | Loss: 0.5610 | TRAIN AUC: 0.8550
Epoch 20 | Loss: 0.5305 | TRAIN AUC: 0.8758
Epoch 25 | Loss: 0.5292 | TRAIN AUC: 0.8891
Epoch 30 | Loss: 0.5147 | TRAIN AUC: 0.9051
Epoch 35 | Loss: 0.5025 | TRAIN AUC: 0.9129
Epoch 40 | Loss: 0.4964 | TRAIN AUC: 0.9219
Epoch 45 | Loss: 0.4875 | TRAIN AUC: 0.9327
Epoch 50 | Loss: 0.4798 | TRAIN AUC: 0.9414
Epoch 55 | Loss: 0.4739 | TRAIN AUC: 0.9437
Epoch 60 | Loss: 0.4698 | TRAIN AUC: 0.9498
Epoch 65 | Loss: 0.4687 | TRAIN AUC: 0.9532
Epoch 70 | Loss: 0.4671 | TRAIN AUC: 0.9562
Epoch 75 | Loss: 0.4689 | TRAIN AUC: 0.9565
Epoch 80 | Loss: 0.4661 | TRAIN AUC: 0.9589
Epoch 85 | Loss: 0.4577 | TRAIN AUC: 0.9617
Epoch 90 | Loss: 0.4574 | TRAIN AUC: 0.9627
Epoch 95 | Loss: 0.4580 | TRAIN AUC: 0.9641
GCN Link Prediction Test AUC: 0.9303519687338561
