<a href="https://colab.research.google.com/github/LucasHyun/NTN/blob/main/ntn_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [68]:
import tensorflow as tf
import scipy.io as sio
import numpy as np
import random

# Hyperparameters and paths
data_number = 0
data_name = 'Wordnet' if data_number == 0 else 'Freebase'
data_path = f'/content/drive/MyDrive/data/{data_name}'
output_path = f'/content/drive/MyDrive/output/{data_name}/'
num_iter = 500
train_both = False
batch_size = 20000
corrupt_size = 10
embedding_size = 100
slice_size = 3
regularization = 0.0001
in_tensor_keep_normal = False
save_per_iter = 10
learning_rate = 0.01

entities_string = '/entities.txt'
relations_string = '/relations.txt'
embeds_string = '/initEmbed.mat'
training_string = '/train.txt'
test_string = '/test.txt'
dev_string = '/dev.txt'

# Load functions
def load_entities(data_path):
    with open(data_path + entities_string, 'r') as f:
        entities_list = f.read().strip().split('\n')
    return entities_list

def load_relations(data_path):
    with open(data_path + relations_string, 'r') as f:
        relations_list = f.read().strip().split('\n')
    return relations_list

def load_init_embeds(data_path):
    embeds_path = data_path + embeds_string
    return load_embeds(embeds_path)

def load_embeds(file_path):
    mat_contents = sio.loadmat(file_path)
    words = mat_contents['words'].squeeze()
    we = mat_contents['We']
    tree = mat_contents['tree'].squeeze()
    word_vecs = [we[:, i].tolist() for i in range(len(words))]
    entity_words = [tree[i][0][0][0][0][0].item() for i in range(len(tree))]
    return word_vecs, entity_words

def load_training_data(data_path):
    with open(data_path + training_string, 'r') as f:
        training_data = [line.split('\t') for line in f.read().strip().split('\n')]
    return np.array(training_data)

def load_dev_data(data_path):
    with open(data_path + dev_string, 'r') as f:
        dev_data = [line.split('\t') for line in f.read().strip().split('\n')]
    return np.array(dev_data)

def load_test_data(data_path):
    with open(data_path + test_string, 'r') as f:
        test_data = [line.split('\t') for line in f.read().strip().split('\n')]
    return np.array(test_data)

# Helper functions
def data_to_indexed(data, entities, relations):
    entity_to_index = {entities[i]: i for i in range(len(entities))}
    relation_to_index = {relations[i]: i for i in range(len(relations))}
    indexed_data = [(entity_to_index[data[i][0]], relation_to_index[data[i][1]], entity_to_index[data[i][2]]) for i in range(len(data))]
    return indexed_data

def get_batch(batch_size, data, num_entities, corrupt_size):
    random_indices = random.sample(range(len(data)), batch_size)
    batch = [(data[i][0], data[i][1], data[i][2], random.randint(0, num_entities - 1))
             for i in random_indices for _ in range(corrupt_size)]
    return batch

def split_batch(data_batch, num_relations):
    batches = [[] for _ in range(num_relations)]
    for e1, r, e2, e3 in data_batch:
        batches[r].append((e1, e2, e3))
    return batches

def fill_feed_dict(batches, train_both, batch_placeholders, label_placeholders, corrupt_placeholder):
    feed_dict = {corrupt_placeholder: [train_both and np.random.random() > 0.5]}
    for i in range(len(batch_placeholders)):
        feed_dict[batch_placeholders[i]] = batches[i]
        feed_dict[label_placeholders[i]] = [[0.0] for _ in range(len(batches[i]))]
    return feed_dict

# Model functions
def inference(batch_placeholders, corrupt_placeholder, init_word_embeds, entity_to_wordvec, num_entities, num_relations, slice_size, batch_size, is_eval, label_placeholders):
    print("Building inference model...")
    d = embedding_size
    k = slice_size
    E = tf.Variable(init_word_embeds, dtype=tf.float32)  # d = embed size
    W = [tf.Variable(tf.random.truncated_normal([d, d, k], stddev=0.1), name=f'W_{r}') for r in range(num_relations)]
    V = [tf.Variable(tf.zeros([k, 2 * d]), name=f'V_{r}') for r in range(num_relations)]
    b = [tf.Variable(tf.zeros([k, 1]), name=f'b_{r}') for r in range(num_relations)]
    U = [tf.Variable(tf.ones([1, k]), name=f'U_{r}') for r in range(num_relations)]

    ent2word = [tf.constant(entity_i.tolist(), dtype=tf.int32) - 1 for entity_i in entity_to_wordvec]
    entEmbed = tf.stack([tf.reduce_mean(tf.gather(E, entword), axis=0) for entword in ent2word])

    predictions = []
    for r in range(num_relations):
        e1, e2, e3 = tf.unstack(tf.cast(batch_placeholders[r], tf.int32), 3, axis=1)
        e1v_pos = tf.transpose(tf.gather(entEmbed, e1), perm=[1, 0])
        e2v_pos = tf.transpose(tf.gather(entEmbed, e2), perm=[1, 0])
        e1v_neg = e1v_pos
        e2v_neg = tf.transpose(tf.gather(entEmbed, e3), perm=[1, 0])
        num_rel_r = tf.shape(e1v_pos)[1]

        preactivation_pos = []
        preactivation_neg = []
        for slice in range(k):
            preactivation_pos.append(tf.reduce_sum(e1v_pos * tf.matmul(W[r][:, :, slice], e2v_pos), axis=0))
            preactivation_neg.append(tf.reduce_sum(e1v_neg * tf.matmul(W[r][:, :, slice], e2v_neg), axis=0))
        preactivation_pos = tf.stack(preactivation_pos, axis=1)
        preactivation_neg = tf.stack(preactivation_neg, axis=1)

        temp2_pos = tf.matmul(V[r], tf.concat([e1v_pos, e2v_pos], axis=0))
        temp2_neg = tf.matmul(V[r], tf.concat([e1v_neg, e2v_neg], axis=0))

        preactivation_pos = preactivation_pos + temp2_pos + b[r]
        preactivation_neg = preactivation_neg + temp2_neg + b[r]

        activation_pos = tf.nn.tanh(preactivation_pos)
        activation_neg = tf.nn.tanh(preactivation_neg)

        score_pos = tf.squeeze(tf.matmul(U[r], activation_pos))
        score_neg = tf.squeeze(tf.matmul(U[r], activation_neg))

        if not is_eval:
            predictions.append(tf.stack([score_pos, score_neg], axis=1))
        else:
            predictions.append(tf.stack([score_pos, tf.squeeze(label_placeholders[r])], axis=1))

    predictions = tf.concat(predictions, axis=0)
    return predictions

def loss(predictions, regularization):
    print("Building loss function...")
    margin = 1.0
    labels = tf.constant([1.0, -1.0])
    temp1 = tf.maximum(0.0, margin - (predictions[:, 0] - predictions[:, 1]))
    temp1 = tf.reduce_sum(temp1)
    temp2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
    loss_val = temp1 + regularization * temp2
    return loss_val

def training(loss, learning_rate):
    print("Building training operation...")
    optimizer = tf.compat.v1.train.AdagradOptimizer(learning_rate)
    train_op = optimizer.minimize(loss)
    return train_op

def evaluation(predictions):
    print("Building evaluation operation...")
    return tf.reduce_mean(tf.cast(tf.equal(tf.argmax(predictions, 1), tf.argmax(tf.constant([1.0, 0.0]), 1)), tf.float32))

# Training function
def run_training():
    print("Starting training process...")
    raw_training_data = load_training_data(data_path)
    raw_dev_data = load_dev_data(data_path)
    raw_test_data = load_test_data(data_path)

    entities_list = load_entities(data_path)
    relations_list = load_relations(data_path)
    init_word_embeds, entity_to_wordvec = load_init_embeds(data_path)

    training_data = data_to_indexed(raw_training_data, entities_list, relations_list)
    dev_data = data_to_indexed(raw_dev_data, entities_list, relations_list)
    test_data = data_to_indexed(raw_test_data, entities_list, relations_list)

    num_entities = len(entities_list)
    num_relations = len(relations_list)

    init_word_embeds = np.array(init_word_embeds, dtype=np.float32)
    entity_to_wordvec = [np.array(words, dtype=np.int32) for words in entity_to_wordvec]

    batch_placeholders = [tf.compat.v1.placeholder(tf.int32, shape=(None, 3)) for _ in range(num_relations)]
    label_placeholders = [tf.compat.v1.placeholder(tf.float32, shape=(None, 1)) for _ in range(num_relations)]
    corrupt_placeholder = tf.compat.v1.placeholder(tf.bool, shape=())

    is_eval = tf.compat.v1.placeholder(tf.bool)

    predictions = inference(batch_placeholders, corrupt_placeholder, init_word_embeds, entity_to_wordvec,
                            num_entities, num_relations, slice_size, batch_size, is_eval, label_placeholders)

    loss_op = loss(predictions, regularization)
    train_op = training(loss_op, learning_rate)
    eval_op = evaluation(predictions)

    with tf.compat.v1.Session() as sess:
        sess.run(tf.compat.v1.global_variables_initializer())

        for step in range(num_iter):
            batch = get_batch(batch_size, training_data, num_entities, corrupt_size)
            batches = split_batch(batch, num_relations)
            feed_dict = fill_feed_dict(batches, train_both, batch_placeholders, label_placeholders, corrupt_placeholder)

            _, loss_value = sess.run([train_op, loss_op], feed_dict=feed_dict)

            if step % save_per_iter == 0:
                dev_batch = get_batch(batch_size, dev_data, num_entities, corrupt_size)
                dev_batches = split_batch(dev_batch, num_relations)
                dev_feed_dict = fill_feed_dict(dev_batches, train_both, batch_placeholders, label_placeholders, corrupt_placeholder)
                dev_feed_dict[is_eval] = True
                accuracy = sess.run(eval_op, feed_dict=dev_feed_dict)
                print(f'Step {step}: loss = {loss_value:.2f}, dev accuracy = {accuracy:.2f}')

        test_batch = get_batch(batch_size, test_data, num_entities, corrupt_size)
        test_batches = split_batch(test_batch, num_relations)
        test_feed_dict = fill_feed_dict(test_batches, train_both, batch_placeholders, label_placeholders, corrupt_placeholder)
        test_feed_dict[is_eval] = True
        accuracy = sess.run(eval_op, feed_dict=test_feed_dict)
        print(f'Test accuracy = {accuracy:.2f}')

run_training()

Starting training process...


RuntimeError: tf.placeholder() is not compatible with eager execution.

In [77]:
import tensorflow as tf
import scipy.io as sio
import numpy as np
import random
import datetime

# Placeholder를 사용하기 위해 그래프 모드로 변경
tf.compat.v1.disable_eager_execution()

# Load data
data_path = './drive/MyDrive/data/Wordnet'
entities_string = '/entities.txt'
relations_string = '/relations.txt'
embeds_string = '/initEmbed.mat'
training_string = '/train.txt'
test_string = '/test.txt'
dev_string = '/dev.txt'

def load_entities(data_path):
    entities_file = open(data_path + entities_string)
    entities_list = entities_file.read().strip().split('\n')
    entities_file.close()
    return entities_list

def load_relations(data_path):
    relations_file = open(data_path + relations_string)
    relations_list = relations_file.read().strip().split('\n')
    relations_file.close()
    return relations_list

def load_init_embeds(data_path):
    embeds_path = data_path + embeds_string
    return load_embeds(embeds_path)

def load_embeds(file_path):
    mat_contents = sio.loadmat(file_path)
    words = mat_contents['words'].squeeze()
    we = mat_contents['We']
    tree = mat_contents['tree'].squeeze()
    word_vecs = [we[:, i].tolist() for i in range(len(words))]
    entity_words = [tree[i][0][0][0][0][0].item() for i in range(len(tree))]
    return word_vecs, entity_words

def load_data(data_path, file_string):
    file = open(data_path + file_string)
    data = [line.split('\t') for line in file.read().strip().split('\n')]
    return np.array(data)

def data_to_indexed(data, entities, relations):
    entity_to_index = {entities[i]: i for i in range(len(entities))}
    relation_to_index = {relations[i]: i for i in range(len(relations))}
    indexed_data = [(entity_to_index[data[i][0]], relation_to_index[data[i][1]], entity_to_index[data[i][2]]) for i in range(len(data))]
    return indexed_data

def get_batch(batch_size, data, num_entities, corrupt_size):
    random_indices = random.sample(range(len(data)), batch_size)
    batch = [(data[i][0], data[i][1], data[i][2], random.randint(0, num_entities - 1)) for i in random_indices for _ in range(corrupt_size)]
    return batch

def split_batch(data_batch, num_relations):
    batches = [[] for _ in range(num_relations)]
    for e1, r, e2, e3 in data_batch:
        batches[r].append((e1, e2, e3))
    return batches

def fill_feed_dict(batches, train_both, batch_placeholders, label_placeholders, corrupt_placeholder):
    feed_dict = {corrupt_placeholder: [train_both and np.random.random() > 0.5]}
    for i in range(len(batch_placeholders)):
        feed_dict[batch_placeholders[i]] = batches[i]
        feed_dict[label_placeholders[i]] = [[0.0] for _ in range(len(batches[i]))]
    return feed_dict

def run_training():
    print("Begin!")
    print("Load training data...")
    raw_training_data = load_data(data_path, training_string)
    print("Load entities and relations...")
    entities_list = load_entities(data_path)
    relations_list = load_relations(data_path)
    indexed_training_data = data_to_indexed(raw_training_data, entities_list, relations_list)
    print("Load embeddings...")
    init_word_embeds, entity_to_wordvec = load_init_embeds(data_path)

    num_entities = len(entities_list)
    num_relations = len(relations_list)
    num_iters = 500
    batch_size = 20000
    corrupt_size = 10
    slice_size = 3
    regularization = 0.0001
    learning_rate = 0.01
    save_per_iter = 10
    train_both = False

    with tf.Graph().as_default():
        print(f"Starting to build graph {datetime.datetime.now()}")
        batch_placeholders = [tf.compat.v1.placeholder(tf.int32, shape=(None, 3), name=f'batch_{i}') for i in range(num_relations)]
        label_placeholders = [tf.compat.v1.placeholder(tf.float32, shape=(None, 1), name=f'label_{i}') for i in range(num_relations)]
        corrupt_placeholder = tf.compat.v1.placeholder(tf.bool, shape=())

        # Build the computational graph
        predictions = inference(batch_placeholders, corrupt_placeholder, init_word_embeds, entity_to_wordvec,
                                num_entities, num_relations, slice_size, batch_size, False, label_placeholders)
        loss_op = loss(predictions, regularization)
        train_op = training(loss_op, learning_rate, tf.compat.v1.trainable_variables())

        # Create a session for running Ops on the Graph.
        sess = tf.compat.v1.Session()

        init = tf.compat.v1.global_variables_initializer()
        sess.run(init)
        saver = tf.compat.v1.train.Saver(tf.compat.v1.trainable_variables())

        for i in range(1, num_iters + 1):
            print(f"Starting iter {i} {datetime.datetime.now()}")
            data_batch = get_batch(batch_size, indexed_training_data, num_entities, corrupt_size)
            relation_batches = split_batch(data_batch, num_relations)

            if i % save_per_iter == 0:
                saver.save(sess, f"{output_path}/{data_name}{i}.sess")

            feed_dict = fill_feed_dict(relation_batches, train_both, batch_placeholders, label_placeholders, corrupt_placeholder)
            _, loss_value = sess.run([train_op, loss_op], feed_dict=feed_dict)

def inference(batch_placeholders, corrupt_placeholder, init_word_embeds, entity_to_wordvec, num_entities, num_relations, slice_size, batch_size, is_eval, label_placeholders):
    # Build the inference graph
    d = 100
    k = slice_size
    ten_k = tf.constant([k])
    num_words = len(init_word_embeds)
    E = tf.Variable(init_word_embeds)
    W = [tf.Variable(tf.random.truncated_normal([d, d, k])) for r in range(num_relations)]
    V = [tf.Variable(tf.zeros([k, 2 * d])) for r in range(num_relations)]
    b = [tf.Variable(tf.zeros([k, 1])) for r in range(num_relations)]
    U = [tf.Variable(tf.ones([1, k]), name=f'U_{r}') for r in range(num_relations)]

    ent2word = [tf.constant([entity_i], dtype=tf.int32) - 1 for entity_i in entity_to_wordvec]
    entEmbed = tf.stack([tf.reduce_mean(tf.gather(E, entword), axis=0) for entword in ent2word])

    predictions = []
    for r in range(num_relations):
        e1, e2, e3 = [tf.squeeze(t) for t in tf.unstack(tf.cast(batch_placeholders[r], tf.int32), 3, axis=1)]
        e1v = tf.transpose(tf.squeeze(tf.gather(entEmbed, e1), axis=0))
        e2v = tf.transpose(tf.squeeze(tf.gather(entEmbed, e2), axis=0))
        e3v = tf.transpose(tf.squeeze(tf.gather(entEmbed, e3), axis=1))
        e1v_pos = e1v
        e2v_pos = e2v
        e1v_neg = e1v
        e2v_neg = e3v
        num_rel_r = tf.expand_dims(tf.shape(e1v_pos)[0], 0)
        preactivation_pos = []
        preactivation_neg = []

        for slice in range(k):
            preactivation_pos.append(tf.reduce_sum(e1v_pos * tf.matmul(W[r][:, :, slice], e2v_pos), axis=1))
            preactivation_neg.append(tf.reduce_sum(e1v_neg * tf.matmul(W[r][:, :, slice], e2v_neg), axis=1))

        preactivation_pos = tf.stack(preactivation_pos, axis=1)
        preactivation_neg = tf.stack(preactivation_neg, axis=1)

        temp2_pos = tf.matmul(V[r], tf.concat([e1v_pos, e2v_pos], axis=1))
        temp2_neg = tf.matmul(V[r], tf.concat([e1v_neg, e2v_neg], axis=1))

        preactivation_pos = preactivation_pos + temp2_pos + b[r]
        preactivation_neg = preactivation_neg + temp2_neg + b[r]

        activation_pos = tf.math.tanh(preactivation_pos)
        activation_neg = tf.math.tanh(preactivation_neg)

        score_pos = tf.reshape(tf.matmul(U[r], activation_pos), num_rel_r)
        score_neg = tf.reshape(tf.matmul(U[r], activation_neg), num_rel_r)

        if not is_eval:
            predictions.append(tf.stack([score_pos, score_neg], axis=1))
        else:
            predictions.append(tf.stack([score_pos, tf.reshape(label_placeholders[r], num_rel_r)], axis=1))

    predictions = tf.concat(predictions, axis=1)
    return predictions

def loss(predictions, regularization):
    temp1 = tf.maximum(tf.subtract(predictions[:, 1], predictions[:, 0]) + 1, 0)
    temp1 = tf.reduce_sum(temp1)
    temp2 = tf.sqrt(sum([tf.reduce_sum(tf.square(var)) for var in tf.compat.v1.trainable_variables()]))
    temp = temp1 + (regularization * temp2)
    return temp

def training(loss, learning_rate, var_list):
    optimizer = tf.optimizers.Adagrad(learning_rate)
    return optimizer.minimize(loss, var_list=var_list)

run_training()


Begin!
Load training data...
Load entities and relations...
Load embeddings...
Starting to build graph 2024-05-18 01:01:12.702258


ValueError: `tape` is required when a `Tensor` loss is passed. Received: loss=Tensor("add_90:0", shape=(), dtype=float32), tape=None.