By default, Jupyter notebooks do not have intellisense. If you like to enable it, add following code.

In [2]:
# enable intellisense
%config IPCompleter.greedy=True

# Binary addition
_What exactly will the RNN learn ?_

**RNN is going to learn the carry bit on its own!**


| input1 | input2 | carry-in | sum | carry-out |
|:---:|:---:|:---:|:---:|:---:|
| 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 1 | 0 |
| 0 | 1 | 0 | 1 | 0 |
| 0 | 1 | 1 | 0 | 1 |
| 1 | 0 | 0 | 1 | 0 |
| 1 | 0 | 1 | 0 | 1 |
| 1 | 1 | 0 | 0 | 1 |
| 1 | 1 | 1 | 1 | 1 |

## Samples
The first step, sample data is needed.
One looup table is used to help us converting int to binary and vice versa

int2binary (__lookup table__)

| int | binary array |
| :--- | :---: |
| 0 | [0, 0, 0, 0, 0, 0, 0, 0] |
| 1 | [0, 0, 0, 0, 0, 0, 0, 1] |
| 2 | [0, 0, 0, 0, 0, 0, 1, 0] |
...
| 255 | [1, 1, 1, 1, 1, 1, 1, 1] |

In [3]:
import copy, numpy as np
from abc import ABC, abstractmethod

In [4]:
np.random.seed(0)

In [5]:
class dataset:
    def __init__(self, binary_dim):
        # creating lookup table for converting int to binary
        self.int2binary = {}
        
        self.largest_number = pow(2,binary_dim)
        range_numbers = range(self.largest_number)
        
        # genrating corresponding binary array
        # for example binary[0] = array([0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)
        binary = np.unpackbits(np.array([range_numbers],dtype=np.uint8).T,axis=1)
        
        # adding binary array to int2binary (lookup table)
        for i in range_numbers:
            self.int2binary[i] = binary[i]
    
    # generate a sample addition problem (a + b = c)
    @staticmethod
    def get_sample_addition_problem(self):
        a_int = np.random.randint(self.largest_number/2) # int version # generate random int between [1,largest_number/2)
        a = self.int2binary[a_int] # binary encoding

        b_int = np.random.randint(self.largest_number/2) # int version
        b = self.int2binary[b_int] # binary encoding

        # true answer => summation
        c_int = a_int + b_int
        c = self.int2binary[c_int]

        return a, b, c, a_int, b_int, c_int


In [6]:
class activation(ABC):
    
    @abstractmethod
    def forward(net):
        pass
    
    @abstractmethod
    def backward(output):
        pass

**sigmoid activation function**

forward

$$ \sigma(x) = \frac{1}{1+e^{-x}}$$

backward
$$ \frac{\partial \sigma(x)}{\partial x} =  \sigma(x)(1- \sigma(x))$$

In [7]:
class sigmoid_activation(activation):
        
    def forward(net):
        return 1/(1 + np.exp(-net))
    
    def backward(output):
        return output*(1 - output)

In every layer, number of neurons along with activation function should be defined.

In [19]:
class network_layer(ABC):
    def __init__(self, neuron_count, activation_function = None):
        self.neuron_count = neuron_count
        self.activation_function = activation_function

In [23]:
class input_layer(network_layer): 
    def forward(X, W_input):
        return np.dot(X,W_input)        

In [None]:
class weight:
    
    @staticmethod
    def GetWeightMatrix(first_dimension, second_dimension):
        return 2*np.random.random((first_dimension,second_dimension)) - 1

In [10]:
class loss_function(ABC):
    
    @abstractmethod
    def compute(target_value, predicted_value):
        pass
    

**Mean squared error function**

https://en.wikipedia.org/wiki/Mean_squared_error

In [None]:
class mse_loss_function(loss_function):
    
    def compute(target_value, predicted_value):
        return np.mean((target_value - predicted_value)**2)

In [None]:
class utility:
    
    @staticmethod
    def print_result(overallError, a_int, b_int, c, d):    
        print("Error:" + str(overallError))
        print("Pred:" + str(d))
        print("True:" + str(c))
        out = 0
        for index, x in enumerate(reversed(d)):
            out += x * pow(2, index)
        print(str(a_int) + " + " + str(b_int) + " = " + str(out))
        print("------------")

In [None]:
class simple_binary_addition_rnn:
    
    def __init__(self,binary_dim, input_dimension, output_dimension, hidden_dimension, learning_rate):
        
        self.binary_dim = binary_dim
        
        # initialize weights
        self.W_input = weight.GetWeightMatrix(input_dimension, hidden_dimension)
        self.W_hidden = weight.GetWeightMatrix(hidden_dimension, hidden_dimension)
        self.W_output = weight.GetWeightMatrix(hidden_dimension, output_dimension)
        
        self.learning_rate = learning_rate
        self.overallError = 0
        
        # Save the values obtained at Hidden Layer of current state in a list to keep track
        self.hidden_layer_values  = list()
        
        # Initially, there is no previous hidden state. So append "0" for that
        self.hidden_layer_values.append(np.zeros(hidden_dimension))
        
        # update values for weights
        self.output_layer_deltas = list()
        
    def feed_forward(a, b, c):   
    
        # position: location of the bit amongst binary_dim-1 bits; for example, starting point "0"; "0 - 7"
        for position in range(binary_dim):

            # With increasing value of position, the bit location of "a" and "b" decreases from "7 -> 0"
            # and each iteration computes the sum of corresponding bit of "a" and "b".
            # ex. for position = 0, X = [a[7],b[7]], 7th bit of a and b.

            location = binary_dim - position - 1
            X = np.array([[a[location], b[location]]])

            # Actual value for (a+b) = c, c is an array of 8 bits, so take transpose to compare bit by bit with X value.        
            target = np.array([[c[location]]]).T

            # Values computed at current hidden layer
            # [dot product of Input(X) and Weights(W_in)] + [dot product of previous hidden layer values and Weights (W_h)]
            # W_hidden: weight from previous step hidden layer to current step hidden layer
            # W_in: weights from current step input to current hidden layer
            # at

            # in https://github.com/peterroelants/peterroelants.github.io/blob/master/notebooks/RNN_implementation/rnn-implementation-part01.ipynb
            # update_state(xk, sk, wx, wRec)
            net_1 = np.dot(X,W_in) + np.dot(hidden_layer_values[-1],W_h) 

            # ht
            layer_1_output = sigmoid(net_1)

            # Save the hidden layer to be used later
            # Recurrent
            """
            forward_states(X, wx, wRec= w_h)
            Unfold the network and compute all state activations 
            given the input X, input weights (wx), and recursive weights 
            (wRec). Return the state activations in a matrix, the last 
            column S[:,-1] contains the final activations.
            """
            hidden_layer_values.append(copy.deepcopy(layer_1_output)) 

            # The new output using new Hidden layer values
            # ot
            net_2 = np.dot(layer_1_output, W_out)

            # y_pred = sigma(ot)
            layer_2_output = sigmoid(net_2)        

            # Delta rule
            # Calculate the error
            # Cost function
            # C = 1/2 x (y_true - y_pred)^2
            output_error = target - layer_2_output

            # Save the error deltas at each step as it will be propagated back
            output_layer_deltas.append((output_error)*sigmoid_output_to_derivative(layer_2_output))

            # Save the sum of error at each binary position
            overallError += np.abs(output_error[0])

            # Round off the values to nearest "0" or "1" and save it to a list
            d[location] = np.round(layer_2_output[0][0])


    
    
    
    def __train__(self, epochs_count):
        
        for epoch in range(epochs_count):
            
            # sample a + b = c
            # for example: 2 + 3 = 5 => (a) 00000010 + (b) 00000011 = (c) 00000101
            a, b, c, a_int, b_int, c_int = dataset.get_sample_addition_problem()
            
            # where we'll store our best guess (binary encoded)
            # desired predictions => d
            d = np.zeros_like(c)  
            
            feed_forward(a, b ,c)
            
            back_propagating(a, b)
    
            # Print out the Progress of the RNN
            if (epoch % 1000 == 0):
                print_result(overallError, a_int, b_int, c, d)
                
    
    
        