In [1]:
import os
import sys
import numpy as np
import theano.tensor as T
import codecs
import theano.sandbox.cuda
theano.sandbox.cuda.use("gpu0")

Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled, CuDNN 4007)


In [3]:
sys.path.append('/usr1/home/ssandeep/UltraDeep/')

In [4]:
from network import LSTM
from layer import HiddenLayer, EmbeddingLayer
from learning_method import LearningMethod

In [5]:
path_to_oracle = '/usr1/home/ssandeep/IncrementalMT/oracle.tsv'

In [6]:
lines = [line.strip().split(' ||| ') for line in codecs.open(path_to_oracle, 'r', encoding='utf8')]

In [7]:
sentences = []
curr_sentence_pointer = 0
for ind, line in enumerate(lines):
    if len(line) == 1 and line[0] == '':
        sentences.append(lines[curr_sentence_pointer:ind])
        curr_sentence_pointer = ind + 1

In [8]:
soure_words = set()
for sentence in sentences:
    source_sentence = sentence[0][0]
    for word in source_sentence.split():
        soure_words.add(word)

In [9]:
target_words = set()
for sentence in sentences:
    target_sentence = sentence[1][0]
    for word in target_sentence.split():
        target_words.add(word)

In [10]:
for sentence in sentences:
    for state in sentence[2:]:
        assert len(state) == 5

In [11]:
source_word2ind = {word:ind for ind, word in enumerate(soure_words)}
source_ind2word = {ind:word for ind, word in enumerate(soure_words)}
target_word2ind = {word:ind for ind, word in enumerate(target_words)}
target_ind2word = {ind:word for ind, word in enumerate(target_words)}

In [12]:
source_word2ind['<s>'] = len(source_word2ind)
source_word2ind['</s>'] = len(source_word2ind) + 1
source_word2ind['$NONE$'] = len(source_word2ind) + 2
target_word2ind['<s>'] = len(target_word2ind)
target_word2ind['</s>'] = len(target_word2ind) + 1
target_word2ind['$NONE$'] = len(target_word2ind) + 2
source_ind2word[len(source_word2ind)] = '<s>'
source_ind2word[len(source_word2ind) + 1] = '</s>'
source_ind2word[len(source_word2ind) + 2] = '$NONE$'
target_ind2word[len(target_word2ind)] = '<s>' 
target_ind2word[len(target_word2ind) + 1] = '</s>' 
target_ind2word[len(target_word2ind) + 2] = '$NONE$'

In [13]:
stack_input = T.ivector()
forward_context_input = T.ivector()
target_input = T.ivector()
action_prediction = T.scalar()
target_output = T.ivector()

In [14]:
# Generate synthetic data to test dimensions
syn_stack_input = np.random.randint(low=0, high=len(source_word2ind), size=(100,)).astype(np.int32)
syn_forward_context_input = np.random.randint(low=0, high=len(source_word2ind), size=(100,)).astype(np.int32)
syn_action_prediction = np.random.randint(low=0, high=2, size=(1,)).astype(np.int32)
syn_target_input = np.random.randint(low=0, high=len(target_word2ind), size=(100,)).astype(np.int32)
syn_target_output = np.random.randint(low=0, high=len(target_word2ind), size=(101,)).astype(np.int32)

In [20]:
# Neural Inventory
source_embedding = EmbeddingLayer(input_dim=len(soure_words), output_dim=128)
stack_source_lstm = LSTM(input_dim=source_embedding.output_dim, hidden_dim=256, name='source_stack_lstm', with_batch=False)
forward_context_lstm = LSTM(input_dim=source_embedding.output_dim, hidden_dim=256, name='source_forward_context_lstm', with_batch=False)
target_lstm = LSTM(input_dim=stack_source_lstm.hidden_dim, hidden_dim=256, name='target_lstm', with_batch=False)
target_embedding = EmbeddingLayer(input_dim=len(target_words), output_dim=target_lstm.input_dim)
action_prediction_weights = HiddenLayer(input_dim=stack_source_lstm.hidden_dim + forward_context_lstm.hidden_dim, output_dim=1)
target_word_decoding_weights = HiddenLayer(input_dim=target_lstm.hidden_dim, output_dim=len(target_word2ind), activation='softmax')

In [31]:
xx = np.random.rand(3,)

In [34]:
np.repeat(xx, 5, axis=1)

ValueError: axis(=1) out of bounds

In [22]:
# =====================================================
# The computational graph for this method
# =====================================================

# Get the embedding matrices seq_len x embdding_dim
stack_embedding_matrix = source_embedding.link(stack_input[::-1])
forward_contenxt_embedding_matrix = source_embedding.link(forward_context_input)
# target_embedding_matrix = target_embedding.fprop(target_input)

# Get LSTM representations of the stack, forward context
stack_lstm_representation = stack_source_lstm.link(stack_embedding_matrix)
forward_context_lstm_representation = forward_context_lstm.link(forward_contenxt_embedding_matrix)

# Concatenate representations and make a prediction about what action to take
concatenated_representation = T.concatenate((stack_lstm_representation, forward_context_lstm_representation))
prediction = action_prediction_weights.link(concatenated_representation)

# Compute squared-error loss between predicted action and gold action
action_prediction_loss = ((action_prediction - prediction) ** 2).mean()

# Reshape the concatenated representation from the encoder and stack it with the target
target_embeddings = T.vertical_stack((stack_lstm_representation.reshape((1, -1))), target_embedding.link(target_input))

# Get the decoded sentence
target_lstm.link(target_embeddings)

# Decode words
decoded_words = target_word_decoding_weights.link(target_lstm.h)

# Compute seq-seq loss
decoding_loss = T.nnet.categorical_crossentropy(decoded_words, target_output).mean()

In [23]:
# Sanity check of Output dimensions
print stack_embedding_matrix.eval(
    {
        stack_input:syn_stack_input
    }
).shape


print forward_contenxt_embedding_matrix.eval(
    {
        forward_context_input:syn_forward_context_input
    }
).shape

print stack_lstm_representation.eval(
    {
        stack_input:syn_stack_input, 
    }
).shape
print forward_context_lstm_representation.eval(
    {
        forward_context_input:syn_forward_context_input,
    }
).shape
print concatenated_representation.eval(
    {
        stack_input:syn_stack_input,
        forward_context_input:syn_forward_context_input,
    }
).shape
print prediction.eval(
    {
        stack_input:syn_stack_input,
        forward_context_input:syn_forward_context_input,
    }
).shape
print target_embeddings.eval(
    {
        stack_input:syn_stack_input,
        target_input:syn_target_input
    }
).shape
print target_lstm_representations.eval(
    {
        stack_input:syn_stack_input,
        target_input:syn_target_input
    }
).shape
print decoded_words.eval(
    {
        stack_input:syn_stack_input,
        target_input:syn_target_input
    }
).shape
print decoding_loss.eval(
    {
        stack_input:syn_stack_input,
        target_input:syn_target_input,
        target_output:syn_target_output
    }
)

(100, 128)
(100, 128)
(256,)
(256,)
(512,)
(1,)
(101, 256)
(256,)
(101, 71180)
11.1729745865


In [24]:
action_params = source_embedding.params + stack_source_lstm.params + forward_context_lstm.params + action_prediction_weights.params
seq_seq_params = source_embedding.params + stack_source_lstm.params + target_embedding.params + target_lstm.params

In [27]:
'''
Update parameters using ADAM
'''
updates_action=LearningMethod(clip=5.0).get_updates('adam', action_prediction_loss, action_params)

#updates_action = Optimizer().adam(
#                action_prediction_loss,
#                action_params,
#        )

In [28]:
'''
Update parameters using ADAM
'''
updates_seq_seq = LearningMethod(clip=5.0).get_updates('adam', decoding_loss, seq_seq_params)
#updates_seq_eq = Optimizer().adam(
#                decoding_loss,
#                seq_seq_params,
#        )

In [29]:
f_train_action = theano.function(
    inputs=[stack_input, forward_context_input, action_prediction],
    outputs=[action_prediction_loss],
    updates=updates_action
)

In [30]:
f_train_seq_seq = theano.function(
    inputs=[stack_input, target_input, target_output],
    outputs=[decoding_loss],
    updates=updates_seq_seq
)

In [None]:
for sentence in sentences:
    
    for state in sentence[2:]:
        word = state[0]
        action = state[1]
        stack_state = state[2]
        forward_context = state[3]
        translation = state[4]
        stack_words = [english_word2ind[word] for word in stack_state]
        forward_context_words = [english_word2ind[word] for word in reversed(forward_context)]
        action = 

In [None]:
f_train_action()

In [82]:
x = [1,1,2,2,3,4,4,4,4,4,4,4]

In [86]:
from collections import Counter
Counter(x).most_common()

[(4, 7), (1, 2), (2, 2), (3, 1)]