In [1]:
reviews_file = open('reviews.txt')
raw_reviews = reviews_file.readlines()
reviews_file.close()

labels_file = open('labels.txt')
raw_labels = labels_file.readlines()
labels_file.close()

tokens = list(map(lambda x:set(x.split(" ")), raw_reviews))

vocab = set()
for sentence in tokens:
    for word in sentence:
        if(len(word)>0):
            vocab.add(word)
vocab = list(vocab)

word_to_index = {}
for i, word in enumerate(vocab):
    word_to_index[word] = i

input_dataset = list()
for sentence in tokens:
    sentence_indices = list()
    for word in sentence:
        try:
            sentence_indices.append(word_to_index[word])
        except:
            ""
    input_dataset.append(list(set(sentence_indices)))

target_dataset = list()
for label in raw_labels:
    if label == 'positive\n':
        target_dataset.append(1)
    else:
        target_dataset.append(0)

In [2]:
# forward propagation
import numpy as np

def softmax(x_):
    x = np.atleast_2d(x_)
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

word_vectors = {} # word embeddings
word_vectors['yankees'] = np.array([[0.,0.,0.]])
word_vectors['bears'] = np.array([[0.,0.,0.]])
word_vectors['braves'] = np.array([[0.,0.,0.]])
word_vectors['red'] = np.array([[0.,0.,0.]])
word_vectors['socks'] = np.array([[0.,0.,0.]])
word_vectors['lose'] = np.array([[0.,0.,0.]])
word_vectors['defeat'] = np.array([[0.,0.,0.]])
word_vectors['beat'] = np.array([[0.,0.,0.]])
word_vectors['tie'] = np.array([[0.,0.,0.]])

sentence_to_output_weights = np.random.rand(3,len(word_vectors)) # sentence embedding to output classification weights

identity = np.eye(3) # transition weights

layer_0 = word_vectors['red']
layer_1 = layer_0.dot(identity) + word_vectors['socks']
layer_2 = layer_1.dot(identity) + word_vectors['defeat']

prediction = softmax(layer_2.dot(sentence_to_output_weights))
print(prediction)

[[0.11111111 0.11111111 0.11111111 0.11111111 0.11111111 0.11111111
  0.11111111 0.11111111 0.11111111]]


In [3]:
# backpropagation
y = np.array([1,0,0,0,0,0,0,0,0]) # target one-hot vector for "yankees"

pred_delta = prediction - y
layer_2_delta = pred_delta.dot(sentence_to_output_weights.T)
defeat_delta = layer_2_delta
layer_1_delta = layer_2_delta.dot(identity.T)
socks_delta = layer_1_delta
layer_0_delta = layer_1_delta.dot(identity.T)
alpha = 0.01
word_vectors['red'] -= layer_0_delta * alpha
word_vectors['socks'] -= socks_delta * alpha
word_vectors['defeat'] -= defeat_delta * alpha
identity -= np.outer(layer_0,layer_1_delta) * alpha
identity -= np.outer(layer_1,layer_2_delta) * alpha # we use the same identity matrix for every layer. Making an identity matrix that makes the addition not commutative anymore so that order matters
sentence_to_output_weights -= np.outer(layer_2,pred_delta) * alpha