In [1]:
import sys

f = open('Grokking-Deep-Learning-master/reviews.txt')
raw_reviews = f.readlines()
f.close()

f = open('Grokking-Deep-Learning-master/labels.txt')
raw_labels = f.readlines()
f.close()

# Here we split the reviews into tokens and add them to a list of lists.
tokens = list(map(lambda x: set(x.split(" ")), raw_reviews))

# Here we select the unique tokens from the above token list by adding them to a set named vocabulary
vocab = set()
for sent in tokens:
    for word in sent:
        if(len(word)>0):
            vocab.add(word)

# Here we are creating a dictionary to keep track of tokens and their respective index in the vocabulary
vocab = list(vocab)
word2index = {}
for i, word in enumerate(vocab):
    word2index[word]=i

# Here we are converting the reviews in to numeric dataset form by using the above created word2index dictionary
input_dataset = list()
for sent in tokens:
    sent_indices = list()
    for word in sent:
        try:
            sent_indices.append(word2index[word])
        except:
            ""
    input_dataset.append(list(set(sent_indices)))

# Converting the labels to binary values
target_dataset = list()
for label in raw_labels:
    if label == 'positive\n':
        target_dataset.append(1)
    else:
        target_dataset.append(0)

In [2]:
import numpy as np

np.random.seed(1)

# Sigmoid function for given x
def sigmoid(x):
    return 1/(1 + np.exp(-x))

# Variable declaration
alpha, iterations = (0.01, 2)
hidden_size = 100
weights_0_1 = 0.2*np.random.random((len(vocab), hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size, 1)) - 0.1
correct,total = (0,0)

for iter in range(iterations):
        # Keeping 1000 values for testing
    for i in range(len(input_dataset) - 1000):
        
        # Here x contains the indexes of words instead of their one hot encoded values
        x, y = (input_dataset[i], target_dataset[i])

        # Forward Propagation
        layer_1 = sigmoid(np.sum(weights_0_1[x], axis=0))    # Embedding layer --> we get the summation with respective to indexes in x
        layer_2 = sigmoid(np.dot(layer_1,weights_1_2))

        # Backward Propagation
        layer_2_delta = layer_2 - y
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)

        # Weight update
        weights_0_1[x] -= layer_1_delta * alpha
        weights_1_2 -= np.outer(layer_1, layer_2_delta) * alpha

        # If prediction and actual value difference is less than 0.5 then it is correct (softmax)
        if(np.abs(layer_2_delta) < 0.5):
            correct += 1
        
        total += 1
        if(i % 10 == 9):
            progress = str(i/float(len(input_dataset)))
            sys.stdout.write(   '\rIter:'+str(iter)\
                                +' Progress:'+progress[2:4]\
                                +'.'+progress[4:6]\
                                +'% Training Accuracy:'\
                                + str(correct/float(total)) + '%')
    print()


Iter:0 Progress:95.99% Training Accuracy:0.82875%
Iter:1 Progress:95.99% Training Accuracy:0.864375%


In [3]:

# Testing phase
correct,total = (0,0)

for i in range(len(input_dataset)-1000,len(input_dataset)):
    x = input_dataset[i]
    y = target_dataset[i]

    # Forward pass
    layer_1 = sigmoid(np.sum(weights_0_1[x],axis=0))
    layer_2 = sigmoid(np.dot(layer_1,weights_1_2))

    if(np.abs(layer_2 - y) < 0.5):
        correct += 1
    total += 1
    
print("Test Accuracy:" + str(correct / float(total)))

Test Accuracy:0.848
