<a href="https://colab.research.google.com/github/LucasHyun/NTN_implementation/blob/master/NTN_updated_version.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import scipy.io as sio
import numpy as np
import random
import datetime


# Load data
data_path = './drive/MyDrive/data/Wordnet'
entities_string = '/entities.txt'
relations_string = '/relations.txt'
embeds_string = '/initEmbed.mat'
training_string = '/train.txt'
test_string = '/test.txt'
dev_string = '/dev.txt'

# 데이터 로드 함수들
def load_entities(data_path):
    entities_file = open(data_path + '/entities.txt')
    entities_list = entities_file.read().strip().split('\n')
    entities_file.close()
    return entities_list

def load_relations(data_path):
    relations_file = open(data_path + '/relations.txt')
    relations_list = relations_file.read().strip().split('\n')
    relations_file.close()
    return relations_list

def load_init_embeds(data_path):
    embeds_path = data_path + '/initEmbed.mat'
    return load_embeds(embeds_path)

def load_embeds(file_path):
    mat_contents = sio.loadmat(file_path)
    words = mat_contents['words'].squeeze()
    we = mat_contents['We']
    tree = mat_contents['tree'].squeeze()
    word_vecs = [we[:, i].tolist() for i in range(len(words))]
    entity_words = [tree[i][0][0][0][0][0].item() for i in range(len(tree))]
    return word_vecs, entity_words

def load_data(data_path, file_string):
    file = open(data_path + file_string)
    data = [line.split('\t') for line in file.read().strip().split('\n')]
    return np.array(data)

def data_to_indexed(data, entities, relations):
    entity_to_index = {entities[i]: i for i in range(len(entities))}
    relation_to_index = {relations[i]: i for i in range(len(relations))}
    indexed_data = [(entity_to_index[data[i][0]], relation_to_index[data[i][1]], entity_to_index[data[i][2]]) for i in range(len(data))]
    return indexed_data

def get_batch(batch_size, data, num_entities, corrupt_size):
    random_indices = random.sample(range(len(data)), batch_size)
    batch = [(data[i][0], data[i][1], data[i][2], random.randint(0, num_entities - 1)) for i in random_indices for _ in range(corrupt_size)]
    return batch

# def split_batch(data_batch, num_relations):
#     batches = [[] for _ in range(num_relations)]
#     for e1, r, e2, e3 in data_batch:
#         batches[r].append((e1, e2, e3))
#     return batches
def split_batch(data_batch, num_relations):
    batches = [[] for _ in range(num_relations)]
    for e1, r, e2, e3 in data_batch:
        batches[r].append((e1, e2, e3))
    return [batch for batch in batches if batch]

def fill_feed_dict(batches, train_both, batch_placeholders, label_placeholders, corrupt_placeholder):
    feed_dict = {corrupt_placeholder: [train_both and np.random.random() > 0.5]}
    for i in range(len(batch_placeholders)):
        feed_dict[batch_placeholders[i]] = batches[i]
        feed_dict[label_placeholders[i]] = [[0.0] for _ in range(len(batches[i]))]
    return feed_dict

# 모델 클래스 정의
class CustomModel(tf.keras.Model):
    def __init__(self, init_word_embeds, entity_to_wordvec, num_entities, num_relations, slice_size, batch_size):
        super(CustomModel, self).__init__()
        self.init_word_embeds = init_word_embeds
        self.entity_to_wordvec = entity_to_wordvec
        self.num_entities = num_entities
        self.num_relations = num_relations
        self.slice_size = slice_size
        self.batch_size = batch_size

    def build(self, input_shape):
        d = 100
        k = self.slice_size #slice size
        num_words = len(self.init_word_embeds)
        self.E = tf.Variable(self.init_word_embeds, trainable=True, dtype=tf.float32)
        self.W = [self.add_weight(shape=(d, d, k), initializer='random_normal', trainable=True, dtype=tf.float32) for _ in range(self.num_relations)]
        self.V = [self.add_weight(shape=(k, 2 * d), initializer='zeros', trainable=True, dtype=tf.float32) for _ in range(self.num_relations)]
        # print("self.V[r] shape:", self.V[0].shape)
        self.b = [self.add_weight(shape=(k, 1), initializer='zeros', trainable=True, dtype=tf.float32) for _ in range(self.num_relations)]
        self.U = [self.add_weight(shape=(1, k), initializer='ones', trainable=True, dtype=tf.float32, name=f'U_{r}') for r in range(self.num_relations)]
        super(CustomModel, self).build(input_shape)

    @tf.function
    def call(self, inputs, training=None):
        # Extracting indices
        e1, e2, e3 = tf.cast(inputs[..., 0], tf.int32), tf.cast(inputs[..., 1], tf.int32), tf.cast(inputs[..., 2], tf.int32)
        # print("e1 shape:", e1.shape)  # (batch_size,)
        # print("e2 shape:", e2.shape)  # (batch_size,)
        # print("e3 shape:", e3.shape)  # (batch_size,)

        # Mapping entities to word vectors and averaging
        ent2word = [tf.constant([entity_i], dtype=tf.int32) - 1 for entity_i in self.entity_to_wordvec]
        entEmbed = tf.stack([tf.reduce_mean(tf.gather(self.E, entword), axis=0) for entword in ent2word])
        # print("entEmbed shape:", entEmbed.shape)  # (num_entities, d)

        predictions = []
        for r in range(self.num_relations):
            # Gathering embeddings
            e1v = tf.transpose(tf.gather(entEmbed, e1))  # (d, batch_size)
            e2v = tf.transpose(tf.gather(entEmbed, e2))  # (d, batch_size)
            e3v = tf.transpose(tf.gather(entEmbed, e3))  # (d, batch_size)
            # print("e1v shape before squeeze:", e1v.shape)
            # print("e2v shape before squeeze:", e2v.shape)
            # print("e3v shape before squeeze:", e3v.shape)

            if len(e1v.shape) > 1 and e1v.shape[0] == 1:
                e1v = tf.squeeze(e1v, axis=0)
            if len(e2v.shape) > 1 and e2v.shape[0] == 1:
                e2v = tf.squeeze(e2v, axis=0)
            if len(e3v.shape) > 1 and e3v.shape[0] == 1:
                e3v = tf.squeeze(e3v, axis=0)
            # print("e1v shape after squeeze:", e1v.shape)
            # print("e2v shape after squeeze:", e2v.shape)
            # print("e3v shape after squeeze:", e3v.shape)

            e1v_pos = e1v
            e2v_pos = e2v
            e1v_neg = e1v
            e2v_neg = e3v

            num_rel_r = tf.expand_dims(tf.shape(e1v_pos)[0], 0)

            preactivation_pos = []
            preactivation_neg = []
            for slice in range(self.slice_size):
              preactivation_pos.append(tf.reduce_sum(e1v * tf.matmul(self.W[r][:, :, slice], e2v), axis=0))
              preactivation_neg.append(tf.reduce_sum(e1v * tf.matmul(self.W[r][:, :, slice], e3v), axis=0))
            preactivation_pos = tf.stack(preactivation_pos, axis=0)  # Shape: (slice_size, batch_size)
            preactivation_neg = tf.stack(preactivation_neg, axis=0)

            # print("preactivation_pos shape:", preactivation_pos.shape)  # (slice_size, batch_size)
            # print("preactivation_neg shape:", preactivation_neg.shape)  # (slice_size, batch_size)

            concat_pos = tf.concat([e1v, e2v], axis=0)  # (2*d, batch_size)
            concat_neg = tf.concat([e1v, e3v], axis=0)  # (2*d, batch_size)
            # print("concat_pos shape:", concat_pos.shape)  # (2 * d, batch_size)
            # print("concat_neg shape:", concat_neg.shape)  # (2 * d, batch_size)

            temp2_pos = tf.matmul(self.V[r], concat_pos)  # (slice_size, batch_size)
            temp2_neg = tf.matmul(self.V[r], concat_neg)
            # print("temp2_pos shape:", temp2_pos.shape)  # (slice_size, batch_size)
            # print("temp2_neg shape:", temp2_neg.shape)  # (slice_size, batch_size)

            bias_r = tf.squeeze(self.b[r], axis=-1)[:, None]

            preactivation_pos = preactivation_pos + temp2_pos + bias_r
            preactivation_neg = preactivation_neg + temp2_neg + bias_r
            # print("preactivation_pos shape after addition:", preactivation_pos.shape)  # (slice_size, batch_size)
            # print("preactivation_neg shape after addition:", preactivation_neg.shape)  # (slice_size, batch_size)

            activation_pos = tf.math.tanh(preactivation_pos)
            activation_neg = tf.math.tanh(preactivation_neg)
            # print("activation_pos shape:", activation_pos.shape)  # (slice_size, batch_size)
            # print("activation_neg shape:", activation_neg.shape)  # (slice_size, batch_size)

            # score_pos = tf.reshape(tf.matmul(self.U[r], activation_pos, transpose_b=True), num_rel_r)
            # score_neg = tf.reshape(tf.matmul(self.U[r], activation_neg, transpose_b=True), num_rel_r)
            # score_pos = tf.reshape(tf.matmul(tf.transpose(self.U[r]), activation_pos), num_rel_r)
            # score_neg = tf.reshape(tf.matmul(tf.transpose(self.U[r]), activation_neg), num_rel_r)
            score_pos = tf.matmul(self.U[r], activation_pos)
            score_neg = tf.matmul(self.U[r], activation_neg)
            # print("score_pos shape:", score_pos.shape)  # (1, batch_size)
            # print("score_neg shape:", score_neg.shape)  # (1, batch_size)

            predictions.append(tf.stack([score_pos, score_neg], axis=1))

        predictions = tf.concat(predictions, axis=1)
        print("final predictions shape:", predictions.shape)
        return predictions

    # @tf.function
    # def call(self, inputs, training=None):
    #     e1, e2, e3 = tf.cast(inputs[..., 0], tf.int32), tf.cast(inputs[..., 1], tf.int32), tf.cast(inputs[..., 2], tf.int32)
    #     ent2word = [tf.constant([entity_i], dtype=tf.int32) - 1 for entity_i in self.entity_to_wordvec]
    #     entEmbed = tf.stack([tf.reduce_mean(tf.gather(self.E, entword), axis=0) for entword in ent2word])

    #     predictions = []
    #     for r in range(self.num_relations):
    #         e1v = tf.transpose(tf.gather(entEmbed, e1))
    #         e2v = tf.transpose(tf.gather(entEmbed, e2))
    #         e3v = tf.transpose(tf.gather(entEmbed, e3))

    #         if len(e1v.shape) > 1 and e1v.shape[0] == 1:
    #             e1v = tf.squeeze(e1v, axis=0)
    #         if len(e2v.shape) > 1 and e2v.shape[0] == 1:
    #             e2v = tf.squeeze(e2v, axis=0)
    #         if len(e3v.shape) > 1 and e3v.shape[0] == 1:
    #             e3v = tf.squeeze(e3v, axis=0)


    #         print("e1v shape:", e1v.shape)
    #         print("e2v shape:", e2v.shape)
    #         print("e3v shape:", e3v.shape)
    #         e1v_pos = e1v
    #         e2v_pos = e2v
    #         e1v_neg = e1v
    #         e2v_neg = e3v
    #         num_rel_r = tf.expand_dims(tf.shape(e1v_pos)[0], 0)
    #         preactivation_pos = []
    #         preactivation_neg = []

    #         for slice in range(self.slice_size):
    #             preactivation_pos.append(tf.reduce_sum(e1v_pos * tf.matmul(self.W[r][:, :, slice], e2v_pos), axis=1))
    #             preactivation_neg.append(tf.reduce_sum(e1v_neg * tf.matmul(self.W[r][:, :, slice], e2v_neg), axis=1))

    #         preactivation_pos = tf.stack(preactivation_pos, axis=1) #Shape: ()
    #         preactivation_neg = tf.stack(preactivation_neg, axis=1)
    #         print(preactivation_pos.shape)
    #         print(preactivation_neg.shape)
    #         concat_pos = tf.concat([e1v_pos, e2v_pos], axis=0)  # Shape: (200, 200000)
    #         concat_neg = tf.concat([e1v_neg, e2v_neg], axis=0)  # Shape: (200, 200000)
    #         temp2_pos = tf.transpose(tf.matmul(self.V[r], concat_pos)) # 200000 * 3
    #         temp2_neg = tf.transpose(tf.matmul(self.V[r], concat_neg))
    #         # temp2_pos = tf.matmul(tf.concat([e1v_pos, e2v_pos], axis=1), tf.transpose(self.V[r]))
    #         # temp2_neg = tf.matmul(tf.concat([e1v_neg, e2v_neg], axis=1), tf.transpose(self.V[r]))
    #         # Match shapes by expanding the bias
    #         bias_r = tf.broadcast_to(tf.squeeze(self.b[r], axis=-1), preactivation_pos.shape)
    #         print(bias_r.shape)
    #         preactivation_pos = preactivation_pos + temp2_pos + bias_r
    #         preactivation_neg = preactivation_neg + temp2_neg + bias_r

    #         activation_pos = tf.math.tanh(preactivation_pos)
    #         activation_neg = tf.math.tanh(preactivation_neg)

    #         score_pos = tf.reshape(tf.matmul(self.U[r], activation_pos), num_rel_r)
    #         score_neg = tf.reshape(tf.matmul(self.U[r], activation_neg), num_rel_r)

    #         predictions.append(tf.stack([score_pos, score_neg], axis=1))

    #     predictions = tf.concat(predictions, axis=1)
    #     return predictions





    def compute_output_shape(self, input_shape):
        return tf.TensorShape([None, self.num_relations, 2])


# 손실 함수 정의
# def loss_fn(predictions, regularization):
#     temp1 = tf.maximum(tf.subtract(predictions[:, 1], predictions[:, 0]) + 1, 0)
#     temp1 = tf.reduce_sum(temp1)
#     temp2 = tf.sqrt(sum([tf.reduce_sum(tf.square(var)) for var in tf.compat.v1.trainable_variables()]))
#     temp = temp1 + (regularization * temp2)
#     return temp
# 손실 함수 정의
def loss_fn(predictions, regularization):
    temp1 = tf.maximum(tf.subtract(predictions[:, 1], predictions[:, 0]) + 1, 0)
    temp1 = tf.reduce_sum(temp1)
    temp2 = tf.sqrt(tf.cast(sum([tf.reduce_sum(tf.square(tf.cast(var, tf.float32))) for var in tf.compat.v1.trainable_variables()]), tf.float32))
    temp = temp1 + (regularization * temp2)
    return temp


# 훈련 스텝 함수 정의
@tf.function
def train_step(model, optimizer, loss_fn, data, batch_size, num_entities, corrupt_size, num_relations, train_both):
    with tf.GradientTape() as tape:
        data_batch = get_batch(batch_size, data, num_entities, corrupt_size)
        relation_batches = split_batch(data_batch, num_relations)
        inputs = tf.constant(np.vstack(relation_batches))
        predictions = model(inputs, training=True)
        loss_value = loss_fn(predictions, 0.0001)

    grads = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss_value

@tf.function
def evaluate(model, data, batch_size, num_entities, num_relations):
    hits = 0
    total = 0
    for i in range(0, len(data), batch_size):
        batch_data = data[i:i + batch_size]
        data_batch = get_batch(len(batch_data), batch_data, num_entities, 1)
        relation_batches = split_batch(data_batch, num_relations)
        inputs = tf.constant(np.vstack(relation_batches))
        preds = model(inputs, training=False)

        for r in range(num_relations):
            for j in range(len(relation_batches[r])):
                if preds[r][j][0] > 0.5:  # Thresholding at 0.
                    hits += 1
                total += 1

    return hits / total



def train_epoch(model, optimizer, loss_fn, data, batch_size, num_entities, corrupt_size, num_relations, train_both):
    total_loss = 0.0
    num_batches = len(data) // batch_size
    for _ in range(num_batches):
        loss_value = train_step(model, optimizer, loss_fn, data, batch_size, num_entities, corrupt_size, num_relations, train_both)
        total_loss += loss_value.numpy()
    return total_loss / num_batches

# 전체 훈련 및 평가 함수
# def run_training():
#     print("Begin!")
#     print("Load training data...")
#     raw_training_data = load_data(data_path, training_string)
#     print("Load validation data...")
#     raw_dev_data = load_data(data_path, dev_string)
#     print("Load test data...")
#     raw_test_data = load_data(data_path, test_string)

#     print("Load entities and relations...")
#     entities_list = load_entities(data_path)
#     relations_list = load_relations(data_path)
#     indexed_training_data = data_to_indexed(raw_training_data, entities_list, relations_list)
#     indexed_dev_data = data_to_indexed(raw_dev_data, entities_list, relations_list)
#     indexed_test_data = data_to_indexed(raw_test_data, entities_list, relations_list)

#     print("Load embeddings...")
#     init_word_embeds, entity_to_wordvec = load_init_embeds(data_path)

#     num_entities = len(entities_list)
#     num_relations = len(relations_list)
#     num_iters = 500
#     batch_size = 100
#     corrupt_size = 10
#     slice_size = 3
#     regularization = 0.0001
#     learning_rate = 0.01
#     save_per_iter = 10
#     train_both = False

#     print(f"Starting to build model {datetime.datetime.now()}")

#     model = CustomModel(init_word_embeds, entity_to_wordvec, num_entities, num_relations, slice_size, batch_size)

#     optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

#     for epoch in range(1, num_iters + 1):
#         print(f"Starting epoch {epoch} {datetime.datetime.now()}")
#         train_loss = train_epoch(model, optimizer, loss_fn, indexed_training_data, batch_size, num_entities, corrupt_size, num_relations, train_both)
#         print(f"Epoch {epoch} loss: {train_loss}")

#         if epoch % save_per_iter == 0:
#             model.save(f"{data_path}/model_epoch_{epoch}")

#         if epoch % save_per_iter == 0:
#             print(f"Evaluating on dev data at epoch {epoch}")
#             dev_accuracy = evaluate(model, indexed_dev_data, batch_size, num_entities, num_relations)
#             print(f"Dev accuracy at epoch {epoch}: {dev_accuracy}")

#     print("Training finished!")
#     print("Evaluating on test data")
#     test_accuracy = evaluate(model, indexed_test_data, batch_size, num_entities, num_relations)
#     print(f"Test accuracy: {test_accuracy}")

# run_training()
def run_training():
    print("Begin!")
    print("Load training data...")
    raw_training_data = load_data(data_path, training_string)
    print("Load validation data...")
    raw_dev_data = load_data(data_path, dev_string)
    print("Load test data...")
    raw_test_data = load_data(data_path, test_string)

    print("Load entities and relations...")
    entities_list = load_entities(data_path)
    relations_list = load_relations(data_path)
    indexed_training_data = data_to_indexed(raw_training_data, entities_list, relations_list)
    indexed_dev_data = data_to_indexed(raw_dev_data, entities_list, relations_list)
    indexed_test_data = data_to_indexed(raw_test_data, entities_list, relations_list)

    print("Load embeddings...")
    init_word_embeds, entity_to_wordvec = load_init_embeds(data_path)

    num_entities = len(entities_list)
    num_relations = len(relations_list)
    num_iters = 10
    batch_size = 64
    corrupt_size = 10
    slice_size = 3
    regularization = 0.0001
    learning_rate = 0.001
    save_per_iter = 10
    train_both = False

    print(f"Starting to build model {datetime.datetime.now()}")

    model = CustomModel(init_word_embeds, entity_to_wordvec, num_entities, num_relations, slice_size, batch_size)

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    for epoch in range(1, num_iters + 1):
        print(f"Starting epoch {epoch} {datetime.datetime.now()}")
        train_loss = train_epoch(model, optimizer, loss_fn, indexed_training_data, batch_size, num_entities, corrupt_size, num_relations, train_both)
        print(f"Epoch {epoch} loss: {train_loss}")

        if epoch % save_per_iter == 0:
            model.save(f"{data_path}/model_epoch_{epoch}")

        if epoch % save_per_iter == 0:
            print(f"Evaluating on dev data at epoch {epoch}")
            dev_accuracy = evaluate(model, indexed_dev_data, batch_size, num_entities, num_relations)
            print(f"Dev accuracy at epoch {epoch}: {dev_accuracy}")

    print("Training finished!")
    print("Evaluating on test data")
    test_accuracy = evaluate(model, indexed_test_data, batch_size, num_entities, num_relations)
    print(f"Test accuracy: {test_accuracy}")

run_training()


Begin!
Load training data...
Load validation data...
Load test data...
Load entities and relations...
Load embeddings...
Starting to build model 2024-05-19 06:23:53.730515
Starting epoch 1 2024-05-19 06:24:35.786323
score_pos shape: (1, 640)
score_neg shape: (1, 640)
score_pos shape: (1, 640)
score_neg shape: (1, 640)
score_pos shape: (1, 640)
score_neg shape: (1, 640)
score_pos shape: (1, 640)
score_neg shape: (1, 640)
score_pos shape: (1, 640)
score_neg shape: (1, 640)
score_pos shape: (1, 640)
score_neg shape: (1, 640)
score_pos shape: (1, 640)
score_neg shape: (1, 640)
score_pos shape: (1, 640)
score_neg shape: (1, 640)
score_pos shape: (1, 640)
score_neg shape: (1, 640)
score_pos shape: (1, 640)
score_neg shape: (1, 640)
score_pos shape: (1, 640)
score_neg shape: (1, 640)
final predictions shape: (1, 22, 640)
score_pos shape: (1, 640)
score_neg shape: (1, 640)
score_pos shape: (1, 640)
score_neg shape: (1, 640)
score_pos shape: (1, 640)
score_neg shape: (1, 640)
score_pos shape: (