In [1]:
# This is an implementation of the End-to-End Memory Network as defined by Sukhbaatar, et al. 
# We use k=1, i.e. have only one computational step in the network

import numpy as np
import theano
import theano.tensor as T
from triple_reader import triple_reader
from question_reader import question_reader

In [2]:
# read in file as tensors
text_file = ("/Users/SaahilM/Documents/Princeton/Academics/Thesis/"
    "Senior Thesis Code/ModifiedEntityGraph/prod/MCTest/production/MCTest/OCR_text/1/Triples/1-medium.txt")
    
tr = triple_reader(text_file)
# print tr.tripleList
tensor = tr.tensor

enMap = tr.enMap
relMap = tr.relMap

R = len(tensor)
N = len(tensor[0])
# dimension for encoding is arbitrary, we pick 20 here
d = 20

In [3]:
tensor_stack = np.hstack(tuple(tensor))
print(tensor_stack.shape)
print(N, N*R)

X = T.lmatrix('X')
q = T.dmatrix('q')
y = T.lmatrix('y')

(81, 3726)
(81, 3726)


In [4]:
q_file = ("/Users/SaahilM/Documents/Princeton/Academics/Thesis/Senior Thesis Code/"
"ModifiedEntityGraph/prod/MCTest/production/MCTest/OCR_text/1/1-medium1-q.txt")

qr = question_reader(q_file)

# print qr.numQuestions

numTrainQ = int(qr.numQuestions*(float(2)/3))
numTestQ = qr.numQuestions - numTrainQ
# print numTrainQ
# print numTestQ

CHOICES_PER_Q = 5

In [5]:
# Create a weight matrix of given size. 
# The matrix is initialized randomly with Gaussian distribution 
# with mean=0 and \sigma=0.1
def initializeWeightMatrix(in_size, out_size):
    return theano.shared(0.1 * np.random.randn(in_size, out_size))

# Create a bias vector of all zeros of given size
def initializeBiasVector(size):
    return theano.shared(np.zeros(size))

In [6]:
# Initialize all our parameters, given our dimensions.
# Input matrix has shape Nx(N*R)
# Query matrix has shape 5xnumQ
# A is the first matrix used to embed our input. It has size dxN
# B is the matrix used to embed the query. It has size dx5
# C is the next matrix used to embed our input. It has size dxN
# W is the final matrix. Takes output O and produces result w_embedded. It has size 5xd

def initializeParams(d, N):
    A = initializeWeightMatrix(d,N)
    B = initializeWeightMatrix(d,CHOICES_PER_Q)
    C = initializeWeightMatrix(d,N)
    W = initializeWeightMatrix(CHOICES_PER_Q,d)
    
#     A = theano.shared(initializeWeightMatrix(d, V))
#     B = theano.shared(initializeWeightMatrix(d, V))
#     C = theano.shared(initializeWeightMatrix(d, V))
#     W = theano.shared(initializeWeightMatrix(V, d))
    return A, B, C, W

A, B, C, W = initializeParams(d, N)
weightMatrices = [A, B, C, W]
print(W.shape.eval())

[ 5 20]


In [7]:
# Define the computational step
# Given input matrix X, query q, and weight matrices, we perform a computational step,
# also known as a "hop". Let M be the number of sentences
def hopComputation(X, q, A, B, C, W):
    #m_i = Ax_i
    mem_matrix = A.dot(X) #dimension (dxN) x (Nx(NxR)) = dx(N*R)
    #u = Bq
    u = B.dot(q) #dimension (dx5) x (5xnumQ) = dxnumQ
    #p_i = softmax(u^T m_i)
    probs = T.nnet.softmax(u.T.dot(mem_matrix)) #dimension(numQxd)x(dx(N*R)) = numQx(N*R)
    #C_i = Cx_i
    c = C.dot(X) #dimension (dxN) x (Nx(NxR)) = dx(N*R)
    o = c.dot(probs.T) #dimension (dx(N*R))x((N*R)xnumQ) = dxnumQ
    
    #w_embedded = Wo
    w_embedded = W.dot(o).T #dimension (5xd)x(dxnumQ) = 5xnumQ.T = numQx5
    
    result = T.nnet.softmax(w_embedded)
    return result
    
    #output = sum of c_matrix * probs
#     o = (probs * c_embedded).sum(axis = 0)
    #result = 

In [8]:
y_hat = hopComputation(X, q, A, B, C, W)
loss = T.nnet.categorical_crossentropy(y_hat, y).mean()

In [9]:
from __future__ import print_function

def inspect_inputs(i, node, fn):
    print(i, node, "input(s) value(s):", fn.inputs, end='')

def inspect_outputs(i, node, fn):
    print(" output(s) value(s):", fn.outputs)
    
def detect_nan(i, node, fn):
    for output in fn.outputs:
        if (not isinstance(output[0], np.random.RandomState) and
            np.isnan(output[0]).any()):
            print('*** NaN detected ***')
            theano.printing.debugprint(node)
            print('Inputs : %s' % [input[0] for input in fn.inputs])
            print('Outputs: %s' % [output[0] for output in fn.outputs])
            break

In [12]:
# Learning rate (chosen to be 0.01)
epsilon = 0.4

# This function trains our neural net, using stochastic gradient descent.
def train_MemNN(loss, X, q, y, y_hat):
    update_weights = []
    for weightMatrix in weightMatrices:
        update = T.grad(loss, weightMatrix)
        update_weights.append((weightMatrix, weightMatrix - update * epsilon))
    train_MemNN_func = theano.function(inputs=[X,q,y], outputs=[loss,y_hat], updates=update_weights, 
                        mode=theano.compile.MonitorMode(
#                             pre_func=inspect_inputs,
                            post_func=detect_nan))
    return train_MemNN_func

train_MemNN_func = train_MemNN(loss, X, q, y, y_hat)

In [14]:
def train_model(in_vect, question, answers, epochs=100):
    train_errors = []
    y_hats = []
    for i in xrange(epochs):
        error = 0
        [cur_loss, cur_yhat] = train_MemNN_func(in_vect, question, answers)
        error += cur_loss
#         print(error)
        train_errors.append(error)
        y_hats.append(cur_yhat)
    return [train_errors, y_hats]

In [16]:
in_vect = tensor_stack.astype(int)
question = np.random.randn(5,numTrainQ).astype(int)
answers = np.array([np.array([0,0,0,0,1]),np.array([0,0,0,1,0]),np.array([0,1,0,0,0]),np.array([1,0,0,0,0])]).astype(int)

# print(in_vect)
# print(question)
# print(answers)
# print(type(in_vect[0][0]))
[train_errors, y_hats] = train_model(in_vect, question, answers, 1000)
print(train_errors)
print(y_hats[-1])
print(answers)

[1.6091778248987971, 1.6091753225475822, 1.6091728173995135, 1.6091703094290393, 1.6091677986105728, 1.6091652849184899, 1.6091627683271315, 1.6091602488108001, 1.6091577263437629, 1.6091552009002472, 1.6091526724544447, 1.6091501409805076, 1.6091476064525498, 1.6091450688446469, 1.6091425281308349, 1.60913998428511, 1.6091374372814284, 1.6091348870937074, 1.6091323336958219, 1.6091297770616069, 1.6091272171648554, 1.6091246539793198, 1.6091220874787096, 1.6091195176366919, 1.6091169444268922, 1.6091143678228912, 1.6091117877982279, 1.6091092043263964, 1.6091066173808468, 1.6091040269349857, 1.6091014329621731, 1.6090988354357254, 1.6090962343289128, 1.6090936296149587, 1.6090910212670422, 1.6090884092582936, 1.6090857935617973, 1.6090831741505907, 1.6090805509976618, 1.6090779240759523, 1.6090752933583539, 1.6090726588177104, 1.6090700204268158, 1.6090673781584146, 1.6090647319852014, 1.6090620818798198, 1.609059427814864, 1.6090567697628755, 1.6090541076963447, 1.6090514415877109, 1.