# The Journey from Mathematics to Machine Learning

## Series 1: Linear algebra

### Episode 7: Understand language

- NLP (Jupyter Notebook)
    1. [Predicting IMDB Movie reviews](#1.-Predicting-IMDB-Movie-reviews)


    
    

## 1. Predicting IMDB Movie reviews

In [2]:
import numpy as np

### Data

In [3]:
with open('dataset/reviews.txt') as f:
    raw_reviews = f.readlines()
with open('dataset/labels.txt') as f:
    raw_labels = f.readlines()

In [4]:
raw_reviews[0]

'bromwell high is a cartoon comedy . it ran at the same time as some other programs about school life  such as  teachers  . my   years in the teaching profession lead me to believe that bromwell high  s satire is much closer to reality than is  teachers  . the scramble to survive financially  the insightful students who can see right through their pathetic teachers  pomp  the pettiness of the whole situation  all remind me of the schools i knew and their students . when i saw the episode in which a student repeatedly tried to burn down the school  i immediately recalled . . . . . . . . . at . . . . . . . . . . high . a classic line inspector i  m here to sack one of your teachers . student welcome to bromwell high . i expect that many adults of my age think that bromwell high is far fetched . what a pity that it isn  t   \n'

#### Creating an input vector

In [5]:
onehots = {}
onehots['goy'] = np.array([1, 0, 0, 0])
onehots['muuhai'] = np.array([0, 1, 0, 0])
onehots['kino'] = np.array([0, 0, 1, 0])
onehots['baina'] = np.array([0, 0, 0, 1])

In [6]:
sentence = ['goy', 'kino', 'baina']

In [7]:
x = np.array([0, 0, 0, 0])
for i in range(len(sentence)):
    x += onehots[sentence[i]]
x

array([1, 0, 1, 1])

In [18]:
tokens = []
for review in raw_reviews:
    review = set(review.split(' '))
    review.remove('')
    tokens.append(list(review))
len(tokens[0])

93

In [9]:
words = set()
for review in tokens:
    for word in review:
        words.add(word)
words = list(words)

In [10]:
len(words)

74074

In [11]:
word_to_index = {}
for i, word in enumerate(words):
    word_to_index[word] = i

In [12]:
len(word_to_index)

74074

In [13]:
len(tokens)

25000

In [14]:
input_dataset = np.zeros((len(tokens), len(words)))

In [15]:
for i, review in enumerate(tokens):
    for word in review:
        input_dataset[i, word_to_index[word]] = 1

In [17]:
input_dataset[0].sum()

93.0

In [19]:
target_dataset = np.array([])
for label in raw_labels:
    if label == 'positive\n':
        target_dataset = np.append(target_dataset, 1)
    else:
        target_dataset = np.append(target_dataset, 0)

In [20]:
target_dataset.shape

(25000,)

In [21]:
target_dataset = target_dataset.reshape(25000, 1)

In [22]:
train_dataset = input_dataset[:24000]
train_labels = target_dataset[:24000]

test_dataset = input_dataset[24000:]
test_labels = target_dataset[24000:]

In [23]:
train_dataset.shape

(24000, 74074)

### Network

#### Linear Layer

In [25]:
class Layer_Linear:
    """Representing a neural network layer"""
    
    def __init__(self, n_inputs, n_outputs):
        """Initlize weights and bias"""
        self.weights = 0.01 * np.random.randn(n_inputs, n_outputs)
        self.biases = np.zeros((1, n_outputs))
    
    def forward(self, inputs):
        """
        It multiplies the inputs by the weights 
        and then sums them, and then sums bias.
        """
        #To calculate gradient, remembering input values
        self.inputs = inputs
        #Calculate outputs' values
        self.output = np.dot(inputs, self.weights) + self.biases
    
    def backward(self, dvalues):
        """Gradient with respect to parameters and input"""
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        self.dresults = np.dot(dvalues, self.weights.T)

#### Activation functions

In [26]:
class Activation_ReLU:
    """ReLU activation"""
    
    def forward(self, inputs):
        """Forward pass"""
        
        #To calculate gradient, remembering input values
        self.inputs = inputs
        
        #Calculate outputs' values
        self.output = np.maximum(0, inputs)
        
    def backward(self, dvalues):
        """Backward pass"""
        
        self.dresults = self.inputs > 0
        self.dresults = self.dresults * dvalues

In [69]:
class Activation_Sigmoid:
    """Sigmoid activation"""
    
    def forward(self, inputs):
        """Forward pass"""
        
        #Calculate outputs' values
        self.output = 1 / (1 + np.exp(-inputs))
    
    def backward(self, dvalues):
        """Backward pass"""
        
        self.dresults = dvalues * (1 - self.output) * self.output

#### Loss function

In [28]:
class Loss_MSE():
    """MSE Loss function"""
    
    def forward(self, y_pred, y_true):
        """Forward pass"""     
        error = np.mean((y_pred - y_true) ** 2)
        return error
    
    def backward(self, y_pred, y_true):
        """Derivative of MSE with respect to preds"""
        
        #Number of samples
        samples = len(y_pred)
        
        #Number of output nodes
        outputs = len(y_pred[0])
        
        #Derivative of MSE
        self.dresults = 2 * (y_pred - y_true) / (outputs * samples)

#### Optimizer

In [29]:
class Optimizer_GD:
    """Gradient descent optimizer"""
    
    def __init__(self, alpha=1.):
        """Initialize hyperparameters"""
        self.alpha = alpha

    def update_parameters(self, layer):
        """Update parameters"""
        
        weights_delta = layer.dweights * self.alpha
        biases_delta = layer.dbiases * self.alpha
        
        #Update parameters
        layer.weights -= weights_delta
        layer.biases -= biases_delta

### Hyperparameter

In [73]:
max_epoch = 10
alpha = 0.1
batch_size = 128

### Initialize the model

In [74]:
layer1 = Layer_Linear(len(words), 100)
activation1 = Activation_ReLU()

layer2 = Layer_Linear(100, 1)
activation2 = Activation_Sigmoid()

#### Initlize optimizer and loss function

In [75]:
loss = Loss_MSE()
optimizer = Optimizer_GD(alpha)

### Training the model

In [76]:
train_steps = len(train_dataset) // batch_size
if train_steps * batch_size < len(train_dataset):
    train_steps += 1

In [77]:
for epoch in range(max_epoch):
    train_error = 0
    train_accuracy = 0
    
    for i in range(train_steps):
        batch_start = i * batch_size
        batch_end = (i+1) * batch_size
        
        input = train_dataset[batch_start:batch_end]
        true = train_labels[batch_start:batch_end]
        
        #Forward pass
        layer1.forward(input)
        activation1.forward(layer1.output)
        layer2.forward(activation1.output)
        activation2.forward(layer2.output)
        train_error += loss.forward(activation2.output, true) / train_steps
        train_accuracy += np.mean((np.abs(activation2.output - true) < 0.5)) / train_steps
        
        #Backward pass
        loss.backward(activation2.output, true)
        activation2.backward(loss.dresults)
        layer2.backward(activation2.dresults)
        activation1.backward(layer2.dresults)
        layer1.backward(activation1.dresults)
        
        #Update parameters
        optimizer.update_parameters(layer2)
        optimizer.update_parameters(layer1)

    print(f'epoch: {epoch},',
          f'Train error: {train_error:.3f},',
          f'Train accuracy: {train_accuracy:.3f}')

epoch: 0, Train error: 0.249, Train accuracy: 0.586
epoch: 1, Train error: 0.212, Train accuracy: 0.762
epoch: 2, Train error: 0.137, Train accuracy: 0.832
epoch: 3, Train error: 0.108, Train accuracy: 0.862
epoch: 4, Train error: 0.094, Train accuracy: 0.878
epoch: 5, Train error: 0.085, Train accuracy: 0.893
epoch: 6, Train error: 0.078, Train accuracy: 0.902
epoch: 7, Train error: 0.073, Train accuracy: 0.911
epoch: 8, Train error: 0.068, Train accuracy: 0.917
epoch: 9, Train error: 0.064, Train accuracy: 0.923


#### Testing the model

In [81]:
test_steps = len(test_dataset) // batch_size
if test_steps * batch_size < len(test_dataset):
    test_steps += 1

In [83]:
test_error = 0
test_accuracy = 0

for i in range(test_steps):
    batch_start = i * batch_size
    batch_end = (i+1) * batch_size
    
    input = test_dataset[batch_start:batch_end]
    true = test_labels[batch_start:batch_end]
    
    activation1.forward(layer1.output)
    layer2.forward(activation1.output)
    activation2.forward(layer2.output)
    test_error += loss.forward(activation2.output, true) / test_steps
    print(test_error)
    test_accuracy += np.mean((np.abs(activation2.output - true) < 0.5)) / test_steps

ValueError: operands could not be broadcast together with shapes (104,1) (128,1) 

In [80]:
print(f'Test error: {test_error:.3f},',
      f'Test accuracy: {test_accuracy:.3f}')

Test error: 0.105, Test accuracy: 0.854
