In [1]:
import math
import pickle
import numpy as np
from numpy.core.numeric import indices
import scipy.optimize
import scipy.sparse as sp
from tensorflow.python.keras.utils.data_utils import Sequence

In [None]:
class NeuralTensorNetwork(object):
    def __init__(self, program_parameters):

        self.num_words           = program_parameters['num_words']
        self.embedding_size      = program_parameters['embedding_size']
        self.num_entities        = program_parameters['num_entities']
        self.num_relations       = program_parameters['num_relations']
        self.batch_size          = program_parameters['batch_size']
        self.slice_size          = program_parameters['slice_size']
        self.word_indices        = program_parameters['word_indices']
        self.activation_function = program_parameters['activation_function']
        self.lamda               = program_parameters['lamda']

        r = 0.0001
        word_vectors = np.random.random((self.embedding_size, self.num_words)) * 2 * r - r
        r = 1 / math.sqrt(2 * self.embedding_size)

        W = {}
        V = {}
        b = {} 
        U = {}

        for i in range(self.num_relations):


            W[i] = np.random.random((self.embedding_size, self.embedding_size, self.slice_size)) * 2 * r - r
            V[i] = np.zeros((2 * self.embedding_size, self.slice_size))
            b[i] = np.zeros((1, self.slice_size))
            U[i] = np.ones((self.slice_size, 1))


        self.theta, self.decode_info = self.S2P(W, V, b, U, word_vectors)


    def S2P(self, W,V,b,U,word_vectors):
        theta       = []
        decode_info = {}
        arguments = [W,V,b,U,word_vectors]
        # print(word_vectors)
        for i in range(len(arguments)):
            argument = arguments[i]
            print(i)
            if isinstance(argument, dict):
                print('if')
                decode_cell = {}
                for j in range(len(argument)):
                    decode_cell[j] = argument[j].shape
                    theta          = np.concatenate((theta, argument[j].flatten()))
                decode_info[i] = decode_cell
            else:
                print('else')
                decode_info[i] = word_vectors.shape
                theta          = np.concatenate((theta, argument.flatten()))

        return theta, decode_info


    def P2S(self, theta):


        stack = []
        index = 0

        for i in range(len(self.decode_info)):


            decode_cell = self.decode_info[i]

            if isinstance(decode_cell, dict):


                param_dict = {}

                for j in range(len(decode_cell)):


                    param_dict[j] = theta[index : index + np.prod(decode_cell[j])].reshape(decode_cell[j])
                    index        += np.prod(decode_cell[j])

                stack.append(param_dict)

            else:
                stack.append(theta[index : index + np.prod(decode_cell)].reshape(decode_cell))
                index += np.prod(decode_cell)

        return stack


    def act_F(self, x):

        if self.activation_function == 0:


            return np.tanh(x)

        elif self.activation_function == 1:


            return (1 / (1 + np.exp(-x)))


    def ActDifferential(self, x):

        if self.activation_function == 0:


            return (1 - np.power(x, 2))

        elif self.activation_function == 1:


            return (x * (1 - x))


    def costF(self, theta, data_batch, flip):


        W, V, b, U, word_vectors = self.P2S(theta)


        entity_vectors = np.zeros((self.embedding_size, self.num_entities))
        entity_vector_grad = np.zeros((self.embedding_size, self.num_entities))


        for entity in range(self.num_entities):

            entity_vectors[:, entity] = np.mean(word_vectors[:, self.word_indices[entity]], axis = 1)


        cost = 0


        W_grad = {}; V_grad = {}; b_grad = {}; U_grad = {}

        for i in range(self.num_relations):


            rel_i_list = (data_batch['rel'] == i)
            num_rel_i = np.sum(rel_i_list)


            e1 = data_batch['e1'][rel_i_list]
            e2 = data_batch['e2'][rel_i_list]
            e3 = data_batch['e3'][rel_i_list]


            entity_vectors_e1 = entity_vectors[:, e1.tolist()]
            entity_vectors_e2 = entity_vectors[:, e2.tolist()]
            entity_vectors_e3 = entity_vectors[:, e3.tolist()]


            if flip:

                entity_vectors_e1_neg = entity_vectors_e1
                entity_vectors_e2_neg = entity_vectors_e3
                e1_neg = e1
                e2_neg = e3

            else:

                entity_vectors_e1_neg = entity_vectors_e3
                entity_vectors_e2_neg = entity_vectors_e2
                e1_neg = e3
                e2_neg = e2


            preactivation_pos = np.zeros((self.slice_size, num_rel_i))
            preactivation_neg = np.zeros((self.slice_size, num_rel_i))


            for slice in range(self.slice_size):

                preactivation_pos[slice, :] = np.sum(entity_vectors_e1 *
                    np.dot(W[i][:, :, slice], entity_vectors_e2), axis = 0)
                preactivation_neg[slice, :] = np.sum(entity_vectors_e1_neg *
                    np.dot(W[i][:, :, slice], entity_vectors_e2_neg), axis = 0)

            preactivation_pos += b[i].T + np.dot(V[i].T, np.vstack((entity_vectors_e1, entity_vectors_e2)))
            preactivation_neg += b[i].T + np.dot(V[i].T, np.vstack((entity_vectors_e1_neg, entity_vectors_e2_neg)))


            activation_pos = self.act_F(preactivation_pos)
            activation_neg = self.act_F(preactivation_neg)


            score_pos = np.dot(U[i].T, activation_pos)
            score_neg = np.dot(U[i].T, activation_neg)


            wrong_filter = (score_pos + 1 > score_neg)[0]


            cost += np.sum(wrong_filter * (score_pos - score_neg + 1)[0])


            W_grad[i] = np.zeros(W[i].shape)
            V_grad[i] = np.zeros(V[i].shape)


            num_wrong = np.sum(wrong_filter)


            activation_pos            = activation_pos[:, wrong_filter]
            activation_neg            = activation_neg[:, wrong_filter]
            entity_vectors_e1_rel     = entity_vectors_e1[:, wrong_filter]
            entity_vectors_e2_rel     = entity_vectors_e2[:, wrong_filter]
            entity_vectors_e1_rel_neg = entity_vectors_e1_neg[:, wrong_filter]
            entity_vectors_e2_rel_neg = entity_vectors_e2_neg[:, wrong_filter]


            e1     = e1[wrong_filter]
            e2     = e2[wrong_filter]
            e1_neg = e1_neg[wrong_filter]
            e2_neg = e2_neg[wrong_filter]

            U_grad[i] = np.sum(activation_pos - activation_neg, axis = 1).reshape(self.slice_size, 1)

            temp_pos_all = U[i] * self.ActDifferential(activation_pos)
            temp_neg_all = - U[i] * self.ActDifferential(activation_neg)

            b_grad[i] = np.sum(temp_pos_all + temp_neg_all, axis = 1).reshape(1, self.slice_size)

            values = np.ones(num_wrong)
            rows   = np.arange(num_wrong + 1)

            e1_sparse     = sp.csr_matrix((values, e1, rows), shape = (num_wrong, self.num_entities))
            e2_sparse     = sp.csr_matrix((values, e2, rows), shape = (num_wrong, self.num_entities))
            e1_neg_sparse = sp.csr_matrix((values, e1_neg, rows), shape = (num_wrong, self.num_entities))
            e2_neg_sparse = sp.csr_matrix((values, e2_neg, rows), shape = (num_wrong, self.num_entities))

            for k in range(self.slice_size):

                temp_pos = temp_pos_all[k, :].reshape(1, num_wrong)
                temp_neg = temp_neg_all[k, :].reshape(1, num_wrong)

                W_grad[i][:, :, k] = np.dot(entity_vectors_e1_rel * temp_pos, entity_vectors_e2_rel.T) \
                    + np.dot(entity_vectors_e1_rel_neg * temp_neg, entity_vectors_e2_rel_neg.T)

                V_grad[i][:, k] = np.sum(np.vstack((entity_vectors_e1_rel, entity_vectors_e2_rel)) * temp_pos
                    + np.vstack((entity_vectors_e1_rel_neg, entity_vectors_e2_rel_neg)) * temp_neg, axis = 1)

                V_pos = V[i][:, k].reshape(2*self.embedding_size, 1) * temp_pos
                V_neg = V[i][:, k].reshape(2*self.embedding_size, 1) * temp_neg

                entity_vector_grad += V_pos[:self.embedding_size, :] * e1_sparse + V_pos[self.embedding_size:, :] * e2_sparse \
                    + V_neg[:self.embedding_size, :] * e1_neg_sparse + V_neg[self.embedding_size:, :] * e2_neg_sparse

                entity_vector_grad += (np.dot(W[i][:, :, k], entity_vectors[:, e2.tolist()]) * temp_pos) * e1_sparse \
                    + (np.dot(W[i][:, :, k].T, entity_vectors[:, e1.tolist()]) * temp_pos) * e2_sparse \
                    + (np.dot(W[i][:, :, k], entity_vectors[:, e2_neg.tolist()]) * temp_neg) * e1_neg_sparse \
                    + (np.dot(W[i][:, :, k].T, entity_vectors[:, e1_neg.tolist()]) * temp_neg) * e2_neg_sparse


            W_grad[i] /= self.batch_size
            V_grad[i] /= self.batch_size
            b_grad[i] /= self.batch_size
            U_grad[i] /= self.batch_size


        word_vector_grad = np.zeros(word_vectors.shape)


        for entity in range(self.num_entities):

            entity_len = len(self.word_indices[entity])
            word_vector_grad[:, self.word_indices[entity]] += \
                np.tile(entity_vector_grad[:, entity].reshape(self.embedding_size, 1) / entity_len, (1, entity_len))


        word_vector_grad /= self.batch_size
        cost             /= self.batch_size


        theta_grad, d_t = self.S2P(W_grad, V_grad, b_grad, U_grad, word_vector_grad)


        cost       += 0.5 * self.lamda * np.sum(theta * theta)
        theta_grad += self.lamda * theta

        return cost, theta_grad

    def MaxThresholds(self, dev_data, dev_labels):


        W, V, b, U, word_vectors = self.P2S(self.theta)


        entity_vectors = np.zeros((self.embedding_size, self.num_entities))


        for entity in range(self.num_entities):

            entity_vectors[:, entity] = np.mean(word_vectors[:, self.word_indices[entity]], axis = 1)


        dev_scores = np.zeros(dev_labels.shape)

        for i in range(dev_data.shape[0]):

            rel = dev_data[i, 1]
            entity_vector_e1  = entity_vectors[:, dev_data[i, 0]].reshape(self.embedding_size, 1)
            entity_vector_e2  = entity_vectors[:, dev_data[i, 2]].reshape(self.embedding_size, 1)

            entity_stack = np.vstack((entity_vector_e1, entity_vector_e2))

            for k in range(self.slice_size):

                dev_scores[i, 0] += U[rel][k, 0] * \
                   (np.dot(entity_vector_e1.T, np.dot(W[rel][:, :, k], entity_vector_e2)) +
                    np.dot(V[rel][:, k].T, entity_stack) + b[rel][0, k])

        score_min = np.min(dev_scores)
        score_max = np.max(dev_scores)

        best_thresholds = np.empty((self.num_relations, 1))
        best_accuracies = np.empty((self.num_relations, 1))

        for i in range(self.num_relations):

            best_thresholds[i, :] = score_min
            best_accuracies[i, :] = -1

        score_temp = score_min
        interval   = 0.01

        while(score_temp <= score_max):

            for i in range(self.num_relations):

                rel_i_list    = (dev_data[:, 1] == i)
                predictions   = (dev_scores[rel_i_list, 0] <= score_temp) * 2 - 1
                temp_accuracy = np.mean((predictions == dev_labels[rel_i_list, 0]))

                if(temp_accuracy > best_accuracies[i, 0]):

                    best_accuracies[i, 0] = temp_accuracy
                    best_thresholds[i, 0] = score_temp

            score_temp += interval

        self.best_thresholds = best_thresholds

    def getPrediction(self, test_data):

        W, V, b, U, word_vectors = self.P2S(self.theta)

        entity_vectors = np.zeros((self.embedding_size, self.num_entities))

        for entity in range(self.num_entities):

            entity_vectors[:, entity] = np.mean(word_vectors[:, self.word_indices[entity]], axis = 1)

        predictions = np.empty((test_data.shape[0], 1))

        for i in range(test_data.shape[0]):

            rel = test_data[i, 1]
            entity_vector_e1  = entity_vectors[:, test_data[i, 0]].reshape(self.embedding_size, 1)
            entity_vector_e2  = entity_vectors[:, test_data[i, 2]].reshape(self.embedding_size, 1)

            entity_stack = np.vstack((entity_vector_e1, entity_vector_e2))
            test_score   = 0

            for k in range(self.slice_size):

                test_score += U[rel][k, 0] * \
                   (np.dot(entity_vector_e1.T, np.dot(W[rel][:, :, k], entity_vector_e2)) +
                    np.dot(V[rel][:, k].T, entity_stack) + b[rel][0, k])

            if(test_score <= self.best_thresholds[rel, 0]):
                predictions[i, 0] = 1
            else:
                predictions[i, 0] = -1

        return predictions

def getTest(file_name, entity_dictionary, relation_dictionary):

    file_object = open(file_name, 'r')
    data        = file_object.read().splitlines()

    num_entries = len(data)
    test_data   = np.empty((num_entries, 3))
    labels      = np.empty((num_entries, 1))

    index = 0

    for line in data:

        entity1, relation, entity2, label = line.split()

        test_data[index, 0] = entity_dictionary[entity1]
        test_data[index, 1] = relation_dictionary[relation]
        test_data[index, 2] = entity_dictionary[entity2]

        if label == '1':
            labels[index, 0] = 1
        else:
            labels[index, 0] = -1

        index += 1

    return test_data, labels

def WordIndices(file_name):

    word_dictionary = pickle.load(open(file_name, 'rb'))
    num_words    = word_dictionary['num_words']
    num_words = num_words[len(num_words)-1]
    word_indices = word_dictionary['word_indices'][0:]
    return word_indices, num_words

def TrainingData(file_name, entity_dictionary, relation_dictionary):

    file_object = open(file_name, 'r')
    data        = file_object.read().splitlines()

    num_examples  = len(data)
    training_data = np.empty((num_examples, 3))

    index = 0
    for line in data:

        entity1, relation, entity2 = line.split()

        training_data[index, 0] = entity_dictionary[entity1]
        training_data[index, 1] = relation_dictionary[relation]
        training_data[index, 2] = entity_dictionary[entity2]

        index += 1

    return training_data, num_examples

def Dictionary(file_name):

    file_object = open(file_name, 'r')
    data = file_object.read().splitlines()

    dictionary = {}
    index = 0

    for entity in data:
        dictionary[entity] = index
        index += 1

    num_entries = index

    return dictionary, num_entries

def ProgramParameters():

    program_parameters = {}

    program_parameters['embedding_size']      = 100  
    program_parameters['slice_size']          = 3      
    program_parameters['num_iterations']      = 500   
    program_parameters['batch_size']          = 20000  
    program_parameters['corrupt_size']        = 10
    program_parameters['activation_function'] = 0      
    program_parameters['lamda']               = 0.0001
    program_parameters['batch_iterations']    = 5      

    return program_parameters


def NTN():

    program_parameters = ProgramParameters()
    num_iterations   = program_parameters['num_iterations']
    batch_size       = program_parameters['batch_size']
    corrupt_size     = program_parameters['corrupt_size']
    batch_iterations = program_parameters['batch_iterations']

    entity_dictionary, num_entities    = Dictionary('datasets/entities.txt')
    relation_dictionary, num_relations = Dictionary('datasets/relations.txt')

    training_data, num_examples = TrainingData('datasets/train_triplet.txt', entity_dictionary, relation_dictionary)

    word_indices, num_words = WordIndices('datasets/word_indices.pickle')
    program_parameters['num_entities']  = num_entities
    program_parameters['num_relations'] = num_relations
    program_parameters['num_examples']  = num_examples
    program_parameters['num_words']     = num_words
    program_parameters['word_indices']  = word_indices

    network = NeuralTensorNetwork(program_parameters)

    for i in range(num_iterations):

        batch_indices = np.random.randint(num_examples, size = batch_size)
        data          = {}
        data['rel']   = np.tile(training_data[batch_indices, 1], (1, corrupt_size)).T
        data['e1']    = np.tile(training_data[batch_indices, 0], (1, corrupt_size)).T
        data['e2']    = np.tile(training_data[batch_indices, 2], (1, corrupt_size)).T
        data['e3']    = np.random.randint(num_entities, size = (batch_size * corrupt_size, 1))

        if np.random.random() < 0.5:

            opt_solution = scipy.optimize.minimize(network.costF, network.theta,
                args = (data, 0,), method = 'L-BFGS-B', jac = True, options = {'maxiter': batch_iterations})
        else:

            opt_solution = scipy.optimize.minimize(network.costF, network.theta,
                args = (data, 1,), method = 'L-BFGS-B', jac = True, options = {'maxiter': batch_iterations})

        network.theta = opt_solution.x
        print(network.theta)
    dev_data, dev_labels   = getTest('dev.txt', entity_dictionary, relation_dictionary)
    test_data, test_labels = getTest('test.txt', entity_dictionary, relation_dictionary)

    network.MaxThresholds(dev_data, dev_labels)
    predictions = network.getPrediction(test_data)

    print("Accuracy:", np.mean((predictions == test_labels)))

NTN()