In [1]:
import random, math
from collections import Counter
import numpy as np

f = open('qa1_single-supporting-fact_train.txt', 'r')
raw = f.readlines()
f.close()

tokens = list()
for line in raw[0:1000]:
    tokens.append(line.lower().replace("\n", "").split(" ")[1: ])

In [2]:
vocab = set()
for sent in tokens:
    for word in sent:
        vocab.add(word)
vocab = list(vocab)

word2index = {}
for i, word in enumerate(vocab):
    word2index[word] = i
    
def words2indices(sentence):
    idx = list()
    for word in sentence:
        idx.append(word2index[word])
    return idx

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x/e_x.sum(axis = 0)

In [3]:
np.random.seed(1)

embed_size = 10
embed = (np.random.rand(len(vocab), embed_size) - 0.5) * 0.1
recurrent = np.eye(embed_size)
start = np.zeros(embed_size)
decoder = (np.random.rand(embed_size, len(vocab)) - 0.5) * 0.1
one_hot = np.eye(len(vocab))

In [4]:
def predict(sent):
    layers = list()
    layer = {}
    layer['hidden'] = start
    layers.append(layer)
    
    loss = 0
    
    pred = list()
    for target_i in range(len(sent)):
        layer = {}
        layer['pred'] = softmax(layers[-1]['hidden'].dot(decoder))
        loss += -np.log(layer['pred'][sent[target_i]])
        layer['hidden'] = layers[-1]['hidden'].dot(recurrent) + embed[sent[target_i]]
        layers.append(layer)
    return layers, loss

In [6]:
for j in range(30000):
    alpha = 0.001
    sent = words2indices(tokens[j % len(tokens)][1:])
    layers, loss = predict(sent)
    
    for layer_idx in reversed(range(len(layers))):
        layer = layers[layer_idx]
        target = sent[layer_idx - 1]
        if(layer_idx > 0):
            layer['output_delta'] = layer['pred'] - one_hot[target]
            new_hidden_delta = layer['output_delta'].dot(decoder.transpose())
            
            if(layer_idx == len(layers) - 1):
                layer['hidden_delta'] = new_hidden_delta
            else:
                layer['hidden_delta'] = new_hidden_delta + layers[layer_idx + 1]['hidden_delta'].dot(recurrent.transpose())
        
        else:
            layer['hidden_delta'] = layers[layer_idx + 1]['hidden_delta'].dot(recurrent.transpose())
    
    start -= layers[0]['hidden_delta'] * alpha / float(len(sent))
    for layer_idx,layer in enumerate(layers[1:]):
        
        decoder -= np.outer(layers[layer_idx]['hidden'], layer['output_delta']) * alpha / float(len(sent))
        
        embed_idx = sent[layer_idx]
        embed[embed_idx] -= layers[layer_idx]['hidden_delta'] * alpha / float(len(sent))
        recurrent -= np.outer(layers[layer_idx]['hidden'], layer['hidden_delta']) * alpha / float(len(sent))
        
    if(j % 1000 == 0):
        print("Perplexity:" + str(np.exp(loss/len(sent))))

Perplexity:82.00554981177322
Perplexity:81.78040534788417
Perplexity:81.48655208505679
Perplexity:80.98975525597028
Perplexity:80.0296590060746
Perplexity:77.95790093323222
Perplexity:72.55037784432018
Perplexity:50.78663524924477
Perplexity:28.58804944997589
Perplexity:20.101694933551666
Perplexity:18.40256722433454
Perplexity:16.876154325976618
Perplexity:14.576792597386866
Perplexity:11.31826944856396
Perplexity:8.373895970108817
Perplexity:6.8705332472118075
Perplexity:5.990560821516737
Perplexity:5.389418994233242
Perplexity:5.034031267499967
Perplexity:4.829984142503316
Perplexity:4.70212051340197
Perplexity:4.622164756734335
Perplexity:4.563231676464281
Perplexity:4.499675311220026
Perplexity:4.419979381178922
Perplexity:4.326086193757514
Perplexity:4.2222888776249645
Perplexity:4.109409343633068
Perplexity:3.983243870937619
Perplexity:3.872262743654662


In [8]:
sent_index = 5

l,_ = predict(words2indices(tokens[sent_index]))

print(tokens[sent_index])

for i,each_layer in enumerate(l[1:-1]):
    input = tokens[sent_index][i]
    true = tokens[sent_index][i+1]
    pred = vocab[each_layer['pred'].argmax()]
    print("Prev Input:" + input + (' ' * (12 - len(input))) +\
          "True:" + true + (" " * (15 - len(true))) + "Pred:" + pred)

['where', 'is', 'daniel?', '\thallway\t4']
Prev Input:where       True:is             Pred:is
Prev Input:is          True:daniel?        Pred:to
Prev Input:daniel?     True:	hallway	4     Pred:the
