# Neural Network for Knowledge Graph Learning
* [ER-MLP](#ER-MLP)
* [Neural Tensor Network](#Neural-Tensor-Network)

In [None]:
# Common imports
import tensorflow as tf
from core.knowledge_graph import KnowledgeGraph
from core.link_predict_utils import *
import shutil
import os

%pylab --no-import-all
%load_ext autoreload
%autoreload 2

def calc_log_eval(predicts, labels, logger, step):
    mrr, hit_at_10, auc_pr, ap, precision, num_pos = metrics_in_a_batch(predicts, labels)
    logger.add_summary(tf.Summary(
        value=[tf.Summary.Value(tag='Summary/MRR', simple_value=mrr),
               tf.Summary.Value(tag='Summary/HIT@10', simple_value=hit_at_10),
               tf.Summary.Value(tag='Summary/AUC_PR', simple_value=auc_pr),
               tf.Summary.Value(tag='Summary/Average_precision', simple_value=ap),
               tf.Summary.Value(tag='Summary/Precision@0.5', simple_value=precision)]), step)
    print("mmr: %f, hit@10: %f, auc_pr: %f, ap: %f, positive predicts: %d, precision: %f" %
          (mrr, hit_at_10, auc_pr, ap, num_pos, precision))

In [None]:
# WARNING!!!!
# Remove every file in `./tmp/` dir.
decide = input('Remove all files in tmp/ ? (y/n)')

if decide.lower() == 'y' or decide.lower() == 'yes':
    if os.path.exists('./tmp'):
        shutil.rmtree('./tmp/')
    print('tmp files cleaned')

## ER-MLP
* [Build Graph](#Build-ER-Graph)
* [Training](#ER-Training)
* [Evaluation](#ER-Evaluation)

In [27]:
# Definitions
import core.er_mlp_model as er_mlp_model

### Build ER Graph

In [28]:
# Hyperparameters
dataset = 'data/kin'
num_slice = 100
rank_e = 100
rank_r = 100
valid_percent = 0.05
test_percent = 0.05
lambda_para = 0.0001

# Read database
database = KnowledgeGraph()
database.read_data_from_txt(dataset)
database.spilt_train_valid_test(valid_percent, test_percent)
num_entities = database.number_of_entities()
num_relations = database.number_of_relations()

# TensorFlow Graph
er_mlp_graph = tf.Graph()
with er_mlp_graph.as_default():
    with tf.name_scope('Feed_in'):
        batch_input = tf.placeholder(shape=[None, 3], name='batch', dtype=tf.int32)
        labels_input = tf.placeholder(shape=[None], name='labels', dtype=tf.float32)

    print('Building Graph...')
    predicts, embed_normalize, optimizer, loss, _, _, summary = er_mlp_model.build_graph(
        batch_input, labels_input, num_entities, num_relations, rank_e, rank_r, num_slice, lambda_para)

    saver = tf.train.Saver(tf.trainable_variables())
    print('Graph is built.')

Building Graph...
Graph is built.


In [6]:
sess = tf.InteractiveSession(graph=er_mlp_graph)
sess.run(tf.global_variables_initializer())

# Parameters for training
max_iter = 20000
corrupt_size_train = 10
corrupt_size_eval = 50
batch_size = 6000
save_per_iter = 300
eval_per_iter = 100

# Tensorflow logs
train_writer = tf.summary.FileWriter('./tmp/log/train', er_mlp_graph)
valid_writer = tf.summary.FileWriter('./tmp/log/valid')

# Terminate condition for training
min_loss = float('inf')
valids_no_improve = 0
max_valids_no_imporve = 10

### ER Training

In [None]:
print("Start training")
for step in range(1, max_iter + 1):
    batch, labels = make_corrupt(database.get_train_batch(batch_size), database,
                                 num_entities, corrupt_size_train)
    train_summary, train_loss, _ = sess.run([summary, loss, optimizer], 
                                            feed_dict={batch_input: batch, labels_input: labels})
    sess.run(embed_normalize)
    train_writer.add_summary(train_summary, step)
    print('Iter %d: Loss = %f' % (step, train_loss), end='\r')
    
    if step % eval_per_iter == 0:
        print('--------------Evaluation---------------')
        #------ Evaluating on training set ------
        print('Training set')
        train_valid_set, train_valid_labels = make_corrupt(database.get_train_batch(batch_size), database,
                                                           num_entities, corrupt_size_eval)
        train_valid_predicts = sess.run(predicts, feed_dict={batch_input: train_valid_set})
        calc_log_eval(train_valid_predicts, np.array(train_valid_labels), train_writer, step)

        #------ Evaluating on validation set ------
        print('Validation set')
        valid_set, valid_labels = make_corrupt(database.get_valid_set(), database,
                                               num_entities, corrupt_size_eval)
        valid_summary, valid_predicts, valid_loss = sess.run([summary, predicts, loss],
                                                             feed_dict={batch_input: valid_set,
                                                                        labels_input: valid_labels})
        valid_writer.add_summary(valid_summary, step)
        calc_log_eval(valid_predicts, np.array(valid_labels), valid_writer, step)
        print('**Validation loss = %f' % valid_loss)

        if valid_loss < min_loss:
            min_loss = valid_loss
            valids_no_improve = 0
        else:
            valids_no_improve = valids_no_improve + 1
        
        if valids_no_improve >= max_valids_no_imporve:
            break
        
    if step % save_per_iter == 0:
        saver.save(sess, './tmp/save/er_mlp_model', global_step=step)
        print('***Saved model at iteration %d' % step)
print('Training done.')

### ER Evaluation

In [8]:
# Restore model and evaluate
restore_step = 11700
saver.restore(sess, './tmp/save/er_mlp_model-%d' % restore_step)

print('####### Test Evaluation #######')
test_set, test_labels = make_corrupt(database.get_test_set(), database,
                                     num_entities, corrupt_size_eval)
test_predicts, test_loss = sess.run([predicts, loss], feed_dict={batch_input: test_set,
                                                                 labels_input: test_labels})
print('Test loss: ', test_loss)
mrr, hit_at_10, auc_pr, ap, precision, num_pos = metrics_in_a_batch(test_predicts, np.array(test_labels))
print("mmr: %f, hit@10: %f, auc_pr: %f, ap: %f, positive predicts: %d, precision: %f" %
      (mrr, hit_at_10, auc_pr, ap, num_pos, precision))

INFO:tensorflow:Restoring parameters from ./tmp/save/er_mlp_model-11700
####### Test Evaluation #######
Test loss:  0.07760059
mmr: 0.959857, hit@10: 1.000000, auc_pr: 0.935556, ap: 0.935613, positive predicts: 663, precision: 0.752640


## Neural Tensor Network
* [Build Graph](#Build-NTN-Graph)
* [Training](#NTN-Training)
* [Evaluation](#NTN-Evaluation)

In [3]:
# Definitions
import core.ntn_model as ntn_model

def fill_feed_dict(input_list_r, data_list_r, input_list, data_list, num_relations):
    feed_dict = {}
    for (var, value) in zip(input_list_r, data_list_r):
        for r in range(num_relations):
            feed_dict[var[r]] = value[r]

    for (var, value) in zip(input_list, data_list):
        feed_dict[var] = value

    return feed_dict

### Build NTN Graph

In [4]:
# Hyperparameters
slice_size = 2
rank = 100
valid_percent = 0.05
test_percent = 0.05
lambda_para = 0.0001

# Database
dataset = 'data/kin'
database = KnowledgeGraph()
database.read_data_from_txt(dataset)
database.spilt_train_valid_test(valid_percent, test_percent)
num_entities = database.number_of_entities()
num_relations = database.number_of_relations()

# TensorFlow Graph
ntn_graph = tf.Graph()
with ntn_graph.as_default():
    with tf.name_scope('Feed_in'):
        batch_input = [tf.placeholder(shape=[None, 2], name='batch_%d' % r, dtype=tf.int32)
                       for r in range(num_relations)]
        r_empty_input = [tf.placeholder(shape=[], name='empty_%d' % r, dtype=tf.bool)
                         for r in range(num_relations)]
        labels_input = tf.placeholder(shape=[None], name='labels', dtype=tf.float32)
    print('Building Graph...')
    predicts, embed_normalize, optimizer, loss, _, summary = ntn_model.build_graph(
        batch_input, labels_input, r_empty_input, num_entities,
        num_relations, rank, slice_size, lambda_para)
    saver = tf.train.Saver(tf.trainable_variables())
    print('Graph built.')

Building Graph...
Graph built.


In [5]:
sess = tf.InteractiveSession(graph=ntn_graph)
sess.run(tf.global_variables_initializer())

# Parameters for training
batch_size = 6000
max_iter = 20000
corrupt_size_train = 10
corrupt_size_eval = 100
save_per_iter = 300
eval_per_iter = 100

# Tensorflow logs
train_writer = tf.summary.FileWriter('./tmp/log/train', ntn_graph)
valid_writer = tf.summary.FileWriter('./tmp/log/valid')

# Terminate condition for training
min_loss = float('inf')
valids_no_improve = 0
max_valids_no_imporve = 10

### NTN Training

In [None]:
print('Start training.')
for step in range(1, max_iter + 1):
    batch, labels = make_corrupt(database.get_train_batch(batch_size), database,
                                 num_entities, corrupt_size_train)
    batch_list, labels, r_empty = make_split(batch, labels, num_relations)
    labels = np.hstack(labels)
    feed_dict = fill_feed_dict([batch_input, r_empty_input], [batch_list, r_empty],
                               [labels_input], [labels], num_relations)
    train_summary, train_loss, _ = sess.run([summary, loss, optimizer], feed_dict=feed_dict)
    sess.run(embed_normalize)
    train_writer.add_summary(train_summary, step)
    print('Iter %d: Loss = %f' % (step, train_loss), end='\r')

    if step % eval_per_iter == 0:
        print('-------------Evaluation------------')
        print('Training set')
        train_valid_set, train_valid_labels = make_corrupt(database.get_train_batch(batch_size), database,
                                                           num_entities, corrupt_size_eval)
        train_valid_list, train_valid_labels, train_valid_r_empty = make_split(batch, labels, num_relations)
        
        feed_dict = fill_feed_dict([batch_input, r_empty_input], [train_valid_list, train_valid_r_empty],
                                   [labels_input], [train_valid_labels], num_relations)
        
        train_valid_predicts = sess.run(predicts, feed_dict=feed_dict)
        calc_log_eval(train_valid_predicts, np.array(train_valid_labels), train_writer, step)

        print('Evaluating on validation set...')
        valid_set, valid_labels = make_corrupt(database.get_valid_set(), database,
                                               num_entities, corrupt_size_eval)
        valid_list, valid_labels, valid_r_empty = make_split(batch, labels, num_relations)
        
        feed_dict = fill_feed_dict([batch_input, r_empty_input], [valid_list, valid_r_empty],
                                   [labels_input], [valid_labels], num_relations)
        
        valid_summary, valid_predicts, valid_loss = sess.run([summary, predicts, loss], feed_dict=feed_dict)
        valid_writer.add_summary(valid_summary, step)
        calc_log_eval(valid_predicts, np.array(valid_labels), valid_writer, step)
        print('**Validation loss = %f' % valid_loss)

        if valid_loss < min_loss:
            min_loss = valid_loss
            valids_no_improve = 0
        else:
            valids_no_improve = valids_no_improve + 1
        
        if valids_no_improve >= max_valids_no_imporve:
            break

    if step % save_per_iter == 0:
        saver.save(sess, './tmp/save/ntn_model', global_step=step)
        print('***Saved model at iteration %d' % step)
print('Training done.')

### NTN Evaluation

In [7]:
# Restore model and evaluate
restore_step = 3900
saver.restore(sess, './tmp/save/ntn_model-%d' % restore_step)

print('####### Test Evaluation #######')
test_set, test_labels = make_corrupt(database.get_test_set(), database,
                                     num_entities, corrupt_size_eval)
test_list, test_labels, test_r_empty = make_split(batch, labels, num_relations)
feed_dict = fill_feed_dict([batch_input, r_empty_input], [test_list, test_r_empty],
                           [labels_input], [test_labels], num_relations)
test_predicts, test_loss = sess.run([predicts, loss], feed_dict=feed_dict)

print('Test loss: ', test_loss)
mrr, hit_at_10, auc_pr, ap, precision, num_pos = metrics_in_a_batch(test_predicts, np.array(test_labels))
print("mmr: %f, hit@10: %f, auc_pr: %f, ap: %f, positive predicts: %d, precision: %f" %
      (mrr, hit_at_10, auc_pr, ap, num_pos, precision))

INFO:tensorflow:Restoring parameters from ./tmp/save/ntn_model-3900
####### Test Evaluation #######
Test loss:  0.055506162
mmr: 0.984252, hit@10: 1.000000, auc_pr: 0.975398, ap: 0.975400, positive predicts: 5747, precision: 0.943971
