In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install -q dgl-cu110

[K     |████████████████████████████████| 39.9 MB 1.5 MB/s 
[?25h

In [None]:
#!pip install dgl -f https://data.dgl.ai/wheels/repo.html

In [None]:
import networkx as nx
import numpy as np
import scipy.sparse
import pickle

from sklearn.model_selection import train_test_split

import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import random

import dgl
from dgl.data.knowledge_graph import load_data
from dgl.nn.pytorch import RelGraphConv

DGL backend not selected or invalid.  Assuming PyTorch for now.
Using backend: pytorch


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


#Data Importing and Preprocessing

In [None]:
%cd '/content/drive/MyDrive/'

/content/drive/MyDrive


In [None]:
def load_IMDB_data(prefix='IMDB_processed'):
    G00 = nx.read_adjlist(prefix + '/0/0-1-0.adjlist', create_using=nx.MultiDiGraph)
    G01 = nx.read_adjlist(prefix + '/0/0-2-0.adjlist', create_using=nx.MultiDiGraph)
    G10 = nx.read_adjlist(prefix + '/1/1-0-1.adjlist', create_using=nx.MultiDiGraph)
    G11 = nx.read_adjlist(prefix + '/1/1-0-2-0-1.adjlist', create_using=nx.MultiDiGraph)
    G20 = nx.read_adjlist(prefix + '/2/2-0-2.adjlist', create_using=nx.MultiDiGraph)
    G21 = nx.read_adjlist(prefix + '/2/2-0-1-0-2.adjlist', create_using=nx.MultiDiGraph)
    idx00 = np.load(prefix + '/0/0-1-0_idx.npy')
    idx01 = np.load(prefix + '/0/0-2-0_idx.npy')
    idx10 = np.load(prefix + '/1/1-0-1_idx.npy')
    idx11 = np.load(prefix + '/1/1-0-2-0-1_idx.npy')
    idx20 = np.load(prefix + '/2/2-0-2_idx.npy')
    idx21 = np.load(prefix + '/2/2-0-1-0-2_idx.npy')
    features_0 = scipy.sparse.load_npz(prefix + '/features_0.npz')
    features_1 = scipy.sparse.load_npz(prefix + '/features_1.npz')
    features_2 = scipy.sparse.load_npz(prefix + '/features_2.npz')
    adjM = scipy.sparse.load_npz(prefix + '/adjM.npz')
    type_mask = np.load(prefix + '/node_types.npy')
    labels = np.load(prefix + '/labels.npy')
    train_val_test_idx = np.load(prefix + '/train_val_test_idx.npz')
    return [[G00, G01], [G10, G11], [G20, G21]], \
           [[idx00, idx01], [idx10, idx11], [idx20, idx21]], \
           [features_0, features_1, features_2],\
           adjM, \
           type_mask,\
           labels,\
           train_val_test_idx

In [None]:
 nx_G_lists, edge_metapath_indices_lists, features_list, adjM, type_mask, labels, train_val_test_idx = load_IMDB_data()

In [None]:
features_ls = [torch.FloatTensor(features.todense()) for features in features_list]
features_ls = torch.cat((features_ls[0], features_ls[1], features_ls[2]))

In [None]:
adjM = adjM.toarray()

In [None]:
#code to transform IMDB into Edge List (dataset)
num_nodes = type_mask.size
num_edges = adjM.size
num_rels = 6
g_data=[]
for x in range(labels.size):
  for y in range(labels.size, type_mask.size):
    rel1=0
    rel2=6
    if adjM[x][y]==1:
      if type_mask[y]==1:
        if labels[x]==0:
          rel1=0
          rel2=6
        elif labels[x]==1:
          rel1=1
          rel2=7
        elif labels[x]==2:
          rel1=2
          rel2=8
      elif type_mask[y]==2:
        if labels[x]==0:
          rel1=0+3
          rel2=6+3
        elif labels[x]==1:
          rel1=1+3
          rel2=7+3
        elif labels[x]==2:
          rel1=2+3
          rel2=8+3
      g_data.append([x, rel1, y])
      #g_data.append([y, rel2, x])

In [None]:
np.random.seed([3,1415])

np.random.shuffle(g_data)
train_data = [x for x in g_data if x[1]!=2]
X = [x for x in g_data if x[1]==2]
irgcn_train_data = X[:100]
for x in X[:100]:
  train_data.append(x) 
test_data = X[100:]
valid_data = test_data

# Model

In [None]:
class BaseRGCN(nn.Module):
    def __init__(self, num_nodes, h_dim, out_dim, num_rels, num_bases,
                 num_hidden_layers=1, dropout=0,
                 use_self_loop=False, use_cuda=False):
        super(BaseRGCN, self).__init__()
        self.num_nodes = num_nodes
        self.h_dim = h_dim
        self.out_dim = out_dim
        self.num_rels = num_rels
        self.num_bases = None if num_bases < 0 else num_bases
        self.num_hidden_layers = num_hidden_layers
        self.dropout = dropout
        self.use_self_loop = use_self_loop
        self.use_cuda = use_cuda

        # create rgcn layers
        self.build_model()

    def build_model(self):
        self.layers = nn.ModuleList()
        # i2h
        i2h = self.build_input_layer()
        if i2h is not None:
            self.layers.append(i2h)
        # h2h
        for idx in range(self.num_hidden_layers):
            h2h = self.build_hidden_layer(idx)
            self.layers.append(h2h)
        # h2o
        h2o = self.build_output_layer()
        if h2o is not None:
            self.layers.append(h2o)

    def build_input_layer(self):
        return None

    def build_hidden_layer(self, idx):
        raise NotImplementedError

    def build_output_layer(self):
        return None

    def forward(self, g, h, r, norm):
        for layer in self.layers:
            h = layer(g, h, r, norm)
        return h

In [None]:
class EmbeddingLayer(nn.Module):
    def __init__(self, num_nodes, h_dim):
        super(EmbeddingLayer, self).__init__()
        self.embedding = torch.nn.Embedding.from_pretrained(features_ls)
        self.linear = torch.nn.Linear(3066, h_dim)

    def forward(self, g, h, r, norm):
        return F.relu(self.linear(self.embedding(h.squeeze())))

In [None]:
class RGCN(BaseRGCN):
    def build_input_layer(self):
        return EmbeddingLayer(self.num_nodes, self.h_dim)

    def build_hidden_layer(self, idx):
        act = F.relu if idx < self.num_hidden_layers - 1 else None
        return RelGraphConv(self.h_dim, self.h_dim, self.num_rels, "bdd",
                self.num_bases, activation=act, self_loop=True,
                dropout=self.dropout)

In [None]:
class LinkPredict(nn.Module):
    def __init__(self, in_dim, h_dim, num_rels, num_bases=-1,
                 num_hidden_layers=1, dropout=0, use_cuda=False, reg_param=0):
        super(LinkPredict, self).__init__()
        self.rgcn = RGCN(in_dim, h_dim, h_dim, num_rels * 2, num_bases,
                         num_hidden_layers, dropout, use_cuda)
        self.reg_param = reg_param
        self.w_relation = nn.Parameter(torch.Tensor(num_rels, h_dim))
        nn.init.xavier_uniform_(self.w_relation,
                                gain=nn.init.calculate_gain('relu'))

    def calc_score(self, embedding, triplets):
        # DistMult
        s = embedding[triplets[:,0]]
        r = self.w_relation[triplets[:,1]]
        o = embedding[triplets[:,2]]
        score = torch.sum(s * r * o, dim=1)
        return score

    def forward(self, g, h, r, norm):
        return self.rgcn.forward(g, h, r, norm)

    def regularization_loss(self, embedding):
        return torch.mean(embedding.pow(2)) + torch.mean(self.w_relation.pow(2))

    def get_loss(self, g, embed, triplets, labels):
        # triplets is a list of data samples (positive and negative)
        # each row in the triplets is a 3-tuple of (source, relation, destination)
        score = self.calc_score(embed, triplets)
        predict_loss = F.binary_cross_entropy_with_logits(score, labels)
        reg_loss = self.regularization_loss(embed)
        return predict_loss + self.reg_param * reg_loss

In [None]:
class IRGCN(nn.Module):
  def __init__(self, in_dim, h_dim, num_rels, num_bases=-1,
                 num_hidden_layers=1, dropout=0, use_cuda=False, reg_param=0):
    super(IRGCN, self).__init__()
    self.embedding = LinkPredict(num_nodes,
                    parameter_n_hidden,
                    num_rels,
                    num_bases=parameter_n_bases,
                    num_hidden_layers=parameter_n_layers,
                    dropout=parameter_dropout,
                    use_cuda=use_cuda,
                    reg_param=parameter_regularization)
    self.fcc1 = torch.nn.Linear(h_dim*2, h_dim)
    self.fcc2 = torch.nn.Linear(h_dim, h_dim)
    self.w_relation = torch.zeros((num_rels, h_dim))
    self.in_dim=in_dim
    self.dropout = nn.Dropout(p=0.2)
    self.embed = 0
    self.h_dim=h_dim
    self.reg_param=reg_param
    self.num_rels=num_rels

  def calc_score(self, triplets):
    # DistMult
    s = self.embed[triplets[:,0]]
    r = self.w_relation[triplets[:,1]]
    o = self.embed[triplets[:,2]]
    score = torch.sum(s * r * o, dim=1)
    return score

  def forward(self, g, h, r, norm, triplets, labels):
    self.embed = self.embedding(g, h, r, norm)
    head = self.embed[triplets[:,0]]
    tail = self.embed[triplets[:,2]]
    x = self.fcc1(torch.cat((head, tail), 1))
    x = F.relu(x)
    x = self.dropout(x)
    x = self.fcc2(x)
    self.w_relation = torch.zeros((self.num_rels, self.h_dim))
    if use_cuda:
      self.w_relation = torch.zeros((self.num_rels, self.h_dim)).cuda()
    
    for i in range(self.num_rels):
      self.w_relation[i] = torch.mean(x[((triplets[:, 1] == i )*( labels==1)).nonzero().squeeze(1)], 0)
    # i=0
    # for op in x:
    #   if labels[i]:
    #     self.w_relation[triplets[i][1]]+=op
    #     c[triplets[i][1]]+=1
    #   i+=1
    # for i in range(self.num_rels):
    #   self.w_relation[i]/=c[i]

  def regularization_loss(self):
    return torch.mean(self.embed.pow(2)) + torch.mean(self.w_relation.pow(2))

  def get_loss(self, g, triplets, labels):
    score = self.calc_score(triplets)
    return F.binary_cross_entropy_with_logits(score, labels)+self.reg_param*self.regularization_loss()
    return torch.mean(torch.log(1+torch.exp(-labels*score)))




In [None]:
def node_norm_to_edge_norm(g, node_norm):
    g = g.local_var()
    # convert to edge norm
    g.ndata['norm'] = node_norm
    g.apply_edges(lambda edges : {'norm' : edges.dst['norm']})
    return g.edata['norm']

#Training RGCN

In [None]:
import sys
sys.path.insert(0,'/content/drive/My Drive/')

import utils

In [None]:
parameter_dropout = 0.2
parameter_gpu = 0
parameter_n_hidden = 96
parameter_n_bases = -1
parameter_n_layers = 2
parameter_n_epochs = 10000
parameter_regularization = 0.01
parameter_lr = 1e-2
parameter_graph_batch_size = 15000
parameter_graph_split_size = 0.5
parameter_negative_sample = 10
parameter_edge_sampler = "uniform"
parameter_grad_norm = 1.0
parameter_evaluate_every = 100
parameter_eval_batch_size = 500
parameter_eval_protocol = "filtered"

In [None]:
# load graph data
'''data = load_data('FB15k-237')
num_nodes = data.num_nodes
train_data = data.train
valid_data = data.valid
test_data = data.test
num_rels = data.num_rels'''

# check cuda
use_cuda = parameter_gpu >= 0 and torch.cuda.is_available()
if use_cuda:
    torch.cuda.set_device(parameter_gpu)

# create model
model = LinkPredict(num_nodes,
                    parameter_n_hidden,
                    num_rels,
                    num_bases=parameter_n_bases,
                    num_hidden_layers=parameter_n_layers,
                    dropout=parameter_dropout,
                    use_cuda=use_cuda,
                    reg_param=parameter_regularization)

# validation and testing triplets
train_data = np.array(train_data)
valid_data = torch.LongTensor(valid_data)
test_data = torch.LongTensor(test_data)

# build test graph
test_graph, test_rel, test_norm = utils.build_test_graph(
    num_nodes, num_rels, train_data)
test_deg = test_graph.in_degrees(
            range(test_graph.number_of_nodes())).float().view(-1,1)
test_node_id = torch.arange(0, num_nodes, dtype=torch.long).view(-1, 1)
test_rel = torch.from_numpy(test_rel)
test_norm = node_norm_to_edge_norm(test_graph, torch.from_numpy(test_norm).view(-1, 1))

if use_cuda:
    model.cuda()

# build adj list and calculate degrees for sampling
adj_list, degrees = utils.get_adj_and_degrees(num_nodes, train_data)

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=parameter_lr)

model_state_file = 'model_state.pth'
forward_time = []
backward_time = []

# training loop
print("start training...")

epoch = 0
best_mrr = 0
while True:
    model.train()
    epoch += 1

    # perform edge neighborhood sampling to generate training graph and data
    g, node_id, edge_type, node_norm, data, labels = \
        utils.generate_sampled_graph_and_labels(
            train_data, parameter_graph_batch_size, parameter_graph_split_size,
            num_rels, adj_list, degrees, parameter_negative_sample,
            parameter_edge_sampler)
    print("Done edge sampling")
    # set node/edge feature
    node_id = torch.from_numpy(node_id).view(-1, 1).long()
    edge_type = torch.from_numpy(edge_type)
    edge_norm = node_norm_to_edge_norm(g, torch.from_numpy(node_norm).view(-1, 1))
    data, labels = torch.from_numpy(data), torch.from_numpy(labels)
    deg = g.in_degrees(range(g.number_of_nodes())).float().view(-1, 1)
    if use_cuda:
        node_id, deg = node_id.cuda(), deg.cuda()
        edge_type, edge_norm = edge_type.cuda(), edge_norm.cuda()
        data, labels = data.cuda(), labels.cuda()
        g = g.to(parameter_gpu)
        test_node_id_g = test_node_id.cuda()
        test_rel_g = test_rel.cuda()
        test_norm_g = test_norm.cuda()
        test_graph_g = test_graph.to(parameter_gpu)
    
    t0 = time.time()
    embed = model(g, node_id, edge_type, edge_norm)
    loss = model.get_loss(g, embed, data, labels)
    t1 = time.time()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), parameter_grad_norm) # clip gradients
    optimizer.step()
    t2 = time.time()

    forward_time.append(t1 - t0)
    backward_time.append(t2 - t1)
    print("Epoch {:04d} | Loss {:.4f} | Best MRR {:.4f} | Forward {:.4f}s | Backward {:.4f}s".
          format(epoch, loss.item(), best_mrr, forward_time[-1], backward_time[-1]))

    optimizer.zero_grad()

    # validation
    if epoch % parameter_evaluate_every == 0:
        # perform validation on CPU because full graph is too large
        if use_cuda:
            model.cpu()
        model.eval()
        print("start eval")
        embed = model(test_graph, test_node_id, test_rel, test_norm)
        mrr = utils.calc_mrr(embed, model.w_relation, torch.LongTensor(train_data),
                              valid_data, test_data, hits=[1, 3, 10], eval_bz=parameter_eval_batch_size,
                              eval_p=parameter_eval_protocol)
        # save best model
        if best_mrr < mrr:
            best_mrr = mrr
            torch.save({'state_dict': model.state_dict(), 'epoch': epoch}, model_state_file)
        
        if epoch >= parameter_n_epochs:
            break
        
        if use_cuda:
            model.cuda()

print("training done")
print("Mean forward time: {:4f}s".format(np.mean(forward_time)))
print("Mean Backward time: {:4f}s".format(np.mean(backward_time)))

print("\nstart testing:")
# use best model checkpoint
checkpoint = torch.load(model_state_file)
if use_cuda:
    model.cpu() # test on CPU
model.eval()
model.load_state_dict(checkpoint['state_dict'])
print("Using best epoch: {}".format(checkpoint['epoch']))
embed = model(test_graph, test_node_id, test_rel, test_norm)
utils.calc_mrr(embed, model.w_relation, torch.LongTensor(train_data), valid_data,
                test_data, hits=[1, 3, 10], eval_bz=parameter_eval_batch_size, eval_p=parameter_eval_protocol)

Test graph:


  norm = 1.0 / in_deg


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
# nodes: 10843, # edges: 15000
Done edge sampling
Epoch 0090 | Loss 0.2196 | Best MRR 0.0000 | Forward 0.0283s | Backward 0.0044s
# sampled nodes: 10849
# sampled edges: 15000
# nodes: 10849, # edges: 15000
Done edge sampling
Epoch 0091 | Loss 0.2174 | Best MRR 0.0000 | Forward 0.0287s | Backward 0.0057s
# sampled nodes: 10865
# sampled edges: 15000
# nodes: 10865, # edges: 15000
Done edge sampling
Epoch 0092 | Loss 0.2137 | Best MRR 0.0000 | Forward 0.0287s | Backward 0.0043s
# sampled nodes: 10834
# sampled edges: 15000
# nodes: 10834, # edges: 15000
Done edge sampling
Epoch 0093 | Loss 0.2138 | Best MRR 0.0000 | Forward 0.0288s | Backward 0.0069s
# sampled nodes: 10852
# sampled edges: 15000
# nodes: 10852, # edges: 15000
Done edge sampling
Epoch 0094 | Loss 0.2099 | Best MRR 0.0000 | Forward 0.0287s | Backward 0.0043s
# sampled nodes: 10845
# sampled edges: 15000
# nodes: 10845, # edges: 15000
Done edge sampling
Epoch

KeyboardInterrupt: ignored

#Training IRGCN

In [None]:
import sys
sys.path.insert(0,'/content/drive/My Drive/')

import utils

In [None]:
np.random.seed([3,1415])

np.random.shuffle(g_data)
train_data = [x for x in g_data if x[1]!=2]
X = [x for x in g_data if x[1]==2]
irgcn_train_data = X[:100]
for x in X[:100]:
  train_data.append(x) 
test_data = X[100:]
valid_data = test_data

In [None]:
parameter_dropout = 0.2
parameter_gpu = 0
parameter_n_hidden = 96
parameter_n_bases = -1
parameter_n_layers = 1
parameter_n_epochs = 10000
parameter_regularization = 0.01
parameter_lr = 1e-2
parameter_graph_batch_size = 15000
parameter_graph_split_size = 0.5
parameter_negative_sample = 10
parameter_edge_sampler = "uniform"
parameter_grad_norm = 1.0
parameter_evaluate_every = 100
parameter_eval_batch_size = 500
parameter_eval_protocol = "filtered"

In [None]:
train_data = np.array(train_data)
valid_data = torch.LongTensor(valid_data)
test_data = torch.LongTensor(test_data)
train_data=train_data[train_data[:, 1].argsort()]

In [None]:
#train_data = irgcn_train_data
# check cuda
use_cuda = parameter_gpu >= 0 and torch.cuda.is_available()
if use_cuda:
    torch.cuda.set_device(parameter_gpu)

# create model
model = IRGCN(num_nodes,
                    parameter_n_hidden,
                    num_rels,
                    num_bases=parameter_n_bases,
                    num_hidden_layers=parameter_n_layers,
                    dropout=parameter_dropout,
                    use_cuda=use_cuda,
                    reg_param=parameter_regularization)

# validation and testing triplets
train_data = np.array(train_data)
valid_data = torch.LongTensor(valid_data)
test_data = torch.LongTensor(test_data)

# build test graph
test_graph, test_rel, test_norm = utils.build_test_graph(
    num_nodes, num_rels, train_data)
test_deg = test_graph.in_degrees(
            range(test_graph.number_of_nodes())).float().view(-1,1)
test_node_id = torch.arange(0, num_nodes, dtype=torch.long).view(-1, 1)
test_rel = torch.from_numpy(test_rel)
test_norm = node_norm_to_edge_norm(test_graph, torch.from_numpy(test_norm).view(-1, 1))

if use_cuda:
    model.cuda()

# build adj list and calculate degrees for sampling
adj_list, degrees = utils.get_adj_and_degrees(num_nodes, train_data)

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=parameter_lr)

model_state_file = 'model_state_irgcn.pth'
forward_time = []
backward_time = []

# training loop
print("start training...")

epoch = 0
best_mrr = 0
while True:
    model.train()
    epoch += 1

    # perform edge neighborhood sampling to generate training graph and data
    g, node_id, edge_type, node_norm, data, labels = \
        utils.generate_sampled_graph_and_labels(
            train_data, parameter_graph_batch_size, parameter_graph_split_size,
            num_rels, adj_list, degrees, parameter_negative_sample,
            parameter_edge_sampler)
    print("Done edge sampling")
    #labels =(labels-0.5)*2
    # set node/edge feature
    node_id = torch.from_numpy(node_id).view(-1, 1).long()
    edge_type = torch.from_numpy(edge_type)
    edge_norm = node_norm_to_edge_norm(g, torch.from_numpy(node_norm).view(-1, 1))
    data, labels = torch.from_numpy(data), torch.from_numpy(labels)
    deg = g.in_degrees(range(g.number_of_nodes())).float().view(-1, 1)
    if use_cuda:
        node_id, deg = node_id.cuda(), deg.cuda()
        edge_type, edge_norm = edge_type.cuda(), edge_norm.cuda()
        data, labels = data.cuda(), labels.cuda()
        g = g.to(parameter_gpu)
        data, labels = data.cuda(), labels.cuda()

    t0 = time.time()


    model(g, node_id, edge_type, edge_norm, data, labels)

    loss = model.get_loss(g, data, labels)
    t1 = time.time()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), parameter_grad_norm) # clip gradients
    optimizer.step()
    t2 = time.time()

    forward_time.append(t1 - t0)
    backward_time.append(t2 - t1)
    print("Epoch {:04d} | Loss {:.4f} | Best MRR {:.4f} | Forward {:.4f}s | Backward {:.4f}s".
          format(epoch, loss.item(), best_mrr, forward_time[-1], backward_time[-1]))

    optimizer.zero_grad()

    # validation
    if epoch % parameter_evaluate_every == 0:
        # perform validation on CPU because full graph is too large
        if use_cuda:
            model.cpu()
        model.eval()
        print("start eval")
        mrr = utils.calc_mrr(model.embedding(test_graph, test_node_id, test_rel, test_norm), model.w_relation.cpu(), torch.LongTensor(train_data).cpu(),
                              valid_data, test_data, hits=[1, 3, 10], eval_bz=parameter_eval_batch_size,
                              eval_p=parameter_eval_protocol)
        # save best model
        if best_mrr < mrr:
            best_mrr = mrr
            torch.save({'state_dict': model.state_dict(), 'epoch': epoch}, model_state_file)
        
        if epoch >= parameter_n_epochs:
            break
        
        if use_cuda:
            model.cuda()

print("training done")
print("Mean forward time: {:4f}s".format(np.mean(forward_time)))
print("Mean Backward time: {:4f}s".format(np.mean(backward_time)))

print("\nstart testing:")
# use best model checkpoint
checkpoint = torch.load(model_state_file)
if use_cuda:
    model.cpu() # test on CPU
model.eval()
model.load_state_dict(checkpoint['state_dict'])
print("Using best epoch: {}".format(checkpoint['epoch']))
utils.calc_mrr(model.embedding(test_graph, test_node_id, test_rel, test_norm), model.w_relation, torch.LongTensor(train_data), valid_data,
                test_data, hits=[1, 3, 10], eval_bz=parameter_eval_batch_size, eval_p=parameter_eval_protocol)

Test graph:


  norm = 1.0 / in_deg


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Done edge sampling
Epoch 0112 | Loss 0.0861 | Best MRR 0.2484 | Forward 0.0420s | Backward 0.1852s
# sampled nodes: 10838
# sampled edges: 15000
# nodes: 10838, # edges: 15000
Done edge sampling
Epoch 0113 | Loss 0.0850 | Best MRR 0.2484 | Forward 0.0421s | Backward 0.1770s
# sampled nodes: 10852
# sampled edges: 15000
# nodes: 10852, # edges: 15000
Done edge sampling
Epoch 0114 | Loss 0.0838 | Best MRR 0.2484 | Forward 0.0421s | Backward 0.1833s
# sampled nodes: 10822
# sampled edges: 15000
# nodes: 10822, # edges: 15000
Done edge sampling
Epoch 0115 | Loss 0.0845 | Best MRR 0.2484 | Forward 0.0420s | Backward 0.1832s
# sampled nodes: 10821
# sampled edges: 15000
# nodes: 10821, # edges: 15000
Done edge sampling
Epoch 0116 | Loss 0.0820 | Best MRR 0.2484 | Forward 0.0419s | Backward 0.1851s
# sampled nodes: 10832
# sampled edges: 15000
# nodes: 10832, # edges: 15000
Done edge sampling
Epoch 0117 | Loss 0.0831 | Best MRR 

KeyboardInterrupt: ignored

In [None]:
model_state_file = 'model_state_irgcn.pth'
checkpoint = torch.load(model_state_file)
if use_cuda:
    model.cpu() # test on CPU
model.eval()
model.load_state_dict(checkpoint['state_dict'])
print("Using best epoch: {}".format(checkpoint['epoch']))
utils.calc_mrr(model.embedding(test_graph, test_node_id, test_rel, test_norm), model.w_relation.cpu(), torch.LongTensor(train_data), valid_data,
                test_data, hits=[1, 3, 10], eval_bz=parameter_eval_batch_size, eval_p=parameter_eval_protocol)

Using best epoch: 800
Perturbing subject...
test triplet 0 / 1459
test triplet 100 / 1459
test triplet 200 / 1459
test triplet 300 / 1459
test triplet 400 / 1459
test triplet 500 / 1459
test triplet 600 / 1459
test triplet 700 / 1459
test triplet 800 / 1459
test triplet 900 / 1459
test triplet 1000 / 1459
test triplet 1100 / 1459
test triplet 1200 / 1459
test triplet 1300 / 1459
test triplet 1400 / 1459
Perturbing object...
test triplet 0 / 1459
test triplet 100 / 1459
test triplet 200 / 1459
test triplet 300 / 1459
test triplet 400 / 1459
test triplet 500 / 1459
test triplet 600 / 1459
test triplet 700 / 1459
test triplet 800 / 1459
test triplet 900 / 1459
test triplet 1000 / 1459
test triplet 1100 / 1459
test triplet 1200 / 1459
test triplet 1300 / 1459
test triplet 1400 / 1459
MRR (filtered): 0.315866
Hits (filtered) @ 1: 0.164154
Hits (filtered) @ 3: 0.392049
Hits (filtered) @ 10: 0.590816


0.3158656358718872

#Playground

In [None]:
# load graph data
'''data = load_data('FB15k-237')
num_nodes = data.num_nodes
train_data = data.train
valid_data = data.valid
test_data = data.test
num_rels = data.num_rels'''

# check cuda
use_cuda = parameter_gpu >= 0 and torch.cuda.is_available()
if use_cuda:
    torch.cuda.set_device(parameter_gpu)

# create model
model = LinkPredict(num_nodes,
                    parameter_n_hidden,
                    num_rels,
                    num_bases=parameter_n_bases,
                    num_hidden_layers=parameter_n_layers,
                    dropout=parameter_dropout,
                    use_cuda=use_cuda,
                    reg_param=parameter_regularization)

# validation and testing triplets
train_data = np.array(train_data)
valid_data = torch.LongTensor(valid_data)
test_data = torch.LongTensor(test_data)

# build test graph
test_graph, test_rel, test_norm = utils.build_test_graph(
    num_nodes, num_rels, train_data)
test_deg = test_graph.in_degrees(
            range(test_graph.number_of_nodes())).float().view(-1,1)
test_node_id = torch.arange(0, num_nodes, dtype=torch.long).view(-1, 1)
test_rel = torch.from_numpy(test_rel)
test_norm = node_norm_to_edge_norm(test_graph, torch.from_numpy(test_norm).view(-1, 1))

if use_cuda:
    model.cuda()

# build adj list and calculate degrees for sampling
adj_list, degrees = utils.get_adj_and_degrees(num_nodes, train_data)

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=parameter_lr)

model_state_file = 'model_state.pth'
forward_time = []
backward_time = []

# training loop
print("start training...")

epoch = 0
best_mrr = 0
while True:
    model.train()
    epoch += 1

    # perform edge neighborhood sampling to generate training graph and data
    g, node_id, edge_type, node_norm, data, labels = \
        utils.generate_sampled_graph_and_labels(
            train_data, parameter_graph_batch_size, parameter_graph_split_size,
            num_rels, adj_list, degrees, parameter_negative_sample,
            parameter_edge_sampler)
    print("Done edge sampling")
    # set node/edge feature
    node_id = torch.from_numpy(node_id).view(-1, 1).long()
    edge_type = torch.from_numpy(edge_type)
    edge_norm = node_norm_to_edge_norm(g, torch.from_numpy(node_norm).view(-1, 1))
    data, labels = torch.from_numpy(data), torch.from_numpy(labels)
    deg = g.in_degrees(range(g.number_of_nodes())).float().view(-1, 1)
    if use_cuda:
        node_id, deg = node_id.cuda(), deg.cuda()
        edge_type, edge_norm = edge_type.cuda(), edge_norm.cuda()
        data, labels = data.cuda(), labels.cuda()
        g = g.to(parameter_gpu)
        test_node_id_g = test_node_id.cuda()
        test_rel_g = test_rel.cuda()
        test_norm_g = test_norm.cuda()
        test_graph_g = test_graph.to(parameter_gpu)
    
    t0 = time.time()
    embed = model(g, node_id, edge_type, edge_norm)
    loss = model.get_loss(g, embed, data, labels)
    t1 = time.time()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), parameter_grad_norm) # clip gradients
    optimizer.step()
    t2 = time.time()

    forward_time.append(t1 - t0)
    backward_time.append(t2 - t1)
    print("Epoch {:04d} | Loss {:.4f} | Best MRR {:.4f} | Forward {:.4f}s | Backward {:.4f}s".
          format(epoch, loss.item(), best_mrr, forward_time[-1], backward_time[-1]))

    optimizer.zero_grad()

    # validation
    if epoch % parameter_evaluate_every == 0:
        # perform validation on CPU because full graph is too large
        if use_cuda:
            model.cpu()
        model.eval()
        print("start eval")
        embed = model(test_graph, test_node_id, test_rel, test_norm)
        mrr = utils.calc_mrr(embed, model.w_relation, torch.LongTensor(train_data),
                              valid_data, test_data, hits=[1, 3, 10], eval_bz=parameter_eval_batch_size,
                              eval_p=parameter_eval_protocol)
        # save best model
        if best_mrr < mrr:
            best_mrr = mrr
            torch.save({'state_dict': model.state_dict(), 'epoch': epoch}, model_state_file)
        
        if epoch >= parameter_n_epochs:
            break
        
        if use_cuda:
            model.cuda()

print("training done")
print("Mean forward time: {:4f}s".format(np.mean(forward_time)))
print("Mean Backward time: {:4f}s".format(np.mean(backward_time)))

print("\nstart testing:")
# use best model checkpoint
checkpoint = torch.load(model_state_file)
if use_cuda:
    model.cpu() # test on CPU
model.eval()
model.load_state_dict(checkpoint['state_dict'])
print("Using best epoch: {}".format(checkpoint['epoch']))
embed = model(test_graph, test_node_id, test_rel, test_norm)
utils.calc_mrr(embed, model.w_relation, torch.LongTensor(train_data), valid_data,
                test_data, hits=[1, 3, 10], eval_bz=parameter_eval_batch_size, eval_p=parameter_eval_protocol)

Test graph:
# nodes: 11616, # edges: 65506
start training...
# sampled nodes: 9972
# sampled edges: 15000
# nodes: 9972, # edges: 15000
Done edge sampling


  norm = 1.0 / in_deg


Epoch 0001 | Loss 0.6935 | Best MRR 0.0000 | Forward 0.0348s | Backward 0.0054s
# sampled nodes: 9999
# sampled edges: 15000
# nodes: 9999, # edges: 15000
Done edge sampling
Epoch 0002 | Loss 0.6651 | Best MRR 0.0000 | Forward 0.0316s | Backward 0.0049s
# sampled nodes: 9982
# sampled edges: 15000
# nodes: 9982, # edges: 15000
Done edge sampling
Epoch 0003 | Loss 0.5258 | Best MRR 0.0000 | Forward 0.0303s | Backward 0.0051s
# sampled nodes: 9995
# sampled edges: 15000
# nodes: 9995, # edges: 15000
Done edge sampling
Epoch 0004 | Loss 0.6140 | Best MRR 0.0000 | Forward 0.0263s | Backward 0.0054s
# sampled nodes: 9992
# sampled edges: 15000
# nodes: 9992, # edges: 15000
Done edge sampling
Epoch 0005 | Loss 0.4345 | Best MRR 0.0000 | Forward 0.0243s | Backward 0.0048s
# sampled nodes: 9960
# sampled edges: 15000
# nodes: 9960, # edges: 15000
Done edge sampling
Epoch 0006 | Loss 0.4031 | Best MRR 0.0000 | Forward 0.0243s | Backward 0.0052s
# sampled nodes: 9996
# sampled edges: 15000
# nod

KeyboardInterrupt: ignored

In [None]:
#train_data = irgcn_train_data
# check cuda
use_cuda = parameter_gpu >= 0 and torch.cuda.is_available()
if use_cuda:
    torch.cuda.set_device(parameter_gpu)

# create model
model = IRGCN(num_nodes,
                    parameter_n_hidden,
                    num_rels,
                    num_bases=parameter_n_bases,
                    num_hidden_layers=parameter_n_layers,
                    dropout=parameter_dropout,
                    use_cuda=use_cuda,
                    reg_param=parameter_regularization)

# validation and testing triplets
train_data = np.array(train_data)
valid_data = torch.LongTensor(valid_data)
test_data = torch.LongTensor(test_data)

# build test graph
test_graph, test_rel, test_norm = utils.build_test_graph(
    num_nodes, num_rels, train_data)
test_deg = test_graph.in_degrees(
            range(test_graph.number_of_nodes())).float().view(-1,1)
test_node_id = torch.arange(0, num_nodes, dtype=torch.long).view(-1, 1)
test_rel = torch.from_numpy(test_rel)
test_norm = node_norm_to_edge_norm(test_graph, torch.from_numpy(test_norm).view(-1, 1))

if use_cuda:
    model.cuda()

# build adj list and calculate degrees for sampling
adj_list, degrees = utils.get_adj_and_degrees(num_nodes, train_data)

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=parameter_lr)

model_state_file = 'model_state_irgcn_2.pth'
forward_time = []
backward_time = []

# training loop
print("start training...")

epoch = 0
best_mrr = 0
while True:
    model.train()
    epoch += 1

    # perform edge neighborhood sampling to generate training graph and data
    g, node_id, edge_type, node_norm, data, labels = \
        utils.generate_sampled_graph_and_labels(
            train_data, parameter_graph_batch_size, parameter_graph_split_size,
            num_rels, adj_list, degrees, parameter_negative_sample,
            parameter_edge_sampler)
    print("Done edge sampling")
    #labels =(labels-0.5)*2
    # set node/edge feature
    node_id = torch.from_numpy(node_id).view(-1, 1).long()
    edge_type = torch.from_numpy(edge_type)
    edge_norm = node_norm_to_edge_norm(g, torch.from_numpy(node_norm).view(-1, 1))
    data, labels = torch.from_numpy(data), torch.from_numpy(labels)
    deg = g.in_degrees(range(g.number_of_nodes())).float().view(-1, 1)
    if use_cuda:
        node_id, deg = node_id.cuda(), deg.cuda()
        edge_type, edge_norm = edge_type.cuda(), edge_norm.cuda()
        data, labels = data.cuda(), labels.cuda()
        g = g.to(parameter_gpu)
        data, labels = data.cuda(), labels.cuda()

    t0 = time.time()


    model(g, node_id, edge_type, edge_norm, data, labels)

    loss = model.get_loss(g, data, labels)
    t1 = time.time()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), parameter_grad_norm) # clip gradients
    optimizer.step()
    t2 = time.time()

    forward_time.append(t1 - t0)
    backward_time.append(t2 - t1)
    print("Epoch {:04d} | Loss {:.4f} | Best MRR {:.4f} | Forward {:.4f}s | Backward {:.4f}s".
          format(epoch, loss.item(), best_mrr, forward_time[-1], backward_time[-1]))

    optimizer.zero_grad()

    # validation
    if epoch % parameter_evaluate_every == 0:
        # perform validation on CPU because full graph is too large
        if use_cuda:
            model.cpu()
        model.eval()
        print("start eval")
        mrr = utils.calc_mrr(model.embedding(test_graph, test_node_id, test_rel, test_norm), model.w_relation.cpu(), torch.LongTensor(train_data).cpu(),
                              valid_data, test_data, hits=[1, 3, 10], eval_bz=parameter_eval_batch_size,
                              eval_p=parameter_eval_protocol)
        #save best model
        if best_mrr < mrr:
            best_mrr = mrr
            torch.save({'state_dict': model.state_dict(), 'epoch': epoch}, model_state_file)
        
        if epoch >= parameter_n_epochs:
            break
        
        if use_cuda:
            model.cuda()

print("training done")
print("Mean forward time: {:4f}s".format(np.mean(forward_time)))
print("Mean Backward time: {:4f}s".format(np.mean(backward_time)))

print("\nstart testing:")
# use best model checkpoint
checkpoint = torch.load(model_state_file)
if use_cuda:
    model.cpu() # test on CPU
model.eval()
model.load_state_dict(checkpoint['state_dict'])
print("Using best epoch: {}".format(checkpoint['epoch']))
utils.calc_mrr(model.embedding(test_graph, test_node_id, test_rel, test_norm), model.w_relation, torch.LongTensor(train_data), valid_data,
                test_data, hits=[1, 3, 10], eval_bz=parameter_eval_batch_size, eval_p=parameter_eval_protocol)

Test graph:
# nodes: 11616, # edges: 65506
start training...
# sampled nodes: 10042
# sampled edges: 15000
# nodes: 10042, # edges: 15000
Done edge sampling


  norm = 1.0 / in_deg


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Done edge sampling
Epoch 0608 | Loss 0.0434 | Best MRR 0.3615 | Forward 0.0338s | Backward 0.0850s
# sampled nodes: 10017
# sampled edges: 15000
# nodes: 10017, # edges: 15000
Done edge sampling
Epoch 0609 | Loss 0.0419 | Best MRR 0.3615 | Forward 0.0334s | Backward 0.0858s
# sampled nodes: 9947
# sampled edges: 15000
# nodes: 9947, # edges: 15000
Done edge sampling
Epoch 0610 | Loss 0.0441 | Best MRR 0.3615 | Forward 0.0337s | Backward 0.0841s
# sampled nodes: 10029
# sampled edges: 15000
# nodes: 10029, # edges: 15000
Done edge sampling
Epoch 0611 | Loss 0.0438 | Best MRR 0.3615 | Forward 0.0353s | Backward 0.0850s
# sampled nodes: 10034
# sampled edges: 15000
# nodes: 10034, # edges: 15000
Done edge sampling
Epoch 0612 | Loss 0.0418 | Best MRR 0.3615 | Forward 0.0336s | Backward 0.0874s
# sampled nodes: 10003
# sampled edges: 15000
# nodes: 10003, # edges: 15000
Done edge sampling
Epoch 0613 | Loss 0.0424 | Best MRR 0.

KeyboardInterrupt: ignored