By default, Jupyter notebooks do not have intellisense. If you like to enable it, add following code.

In [1]:
# enable intellisense
%config IPCompleter.greedy=True

# Binary addition
_What exactly will the RNN learn ?_

**RNN is going to learn the carry bit on its own!**


| input1 | input2 | carry-in | sum | carry-out |
|:---:|:---:|:---:|:---:|:---:|
| 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 1 | 0 |
| 0 | 1 | 0 | 1 | 0 |
| 0 | 1 | 1 | 0 | 1 |
| 1 | 0 | 0 | 1 | 0 |
| 1 | 0 | 1 | 0 | 1 |
| 1 | 1 | 0 | 0 | 1 |
| 1 | 1 | 1 | 1 | 1 |

## Samples
The first step, sample data is needed.
One looup table is used to help us converting int to binary and vice versa

int2binary (__lookup table__)

| int | binary array |
| :--- | :---: |
| 0 | [0, 0, 0, 0, 0, 0, 0, 0] |
| 1 | [0, 0, 0, 0, 0, 0, 0, 1] |
| 2 | [0, 0, 0, 0, 0, 0, 1, 0] |
...
| 255 | [1, 1, 1, 1, 1, 1, 1, 1] |

In [2]:
import copy, numpy as np
from abc import ABC, abstractmethod

In [3]:
np.random.seed(0)

In [4]:
class dataset:
    def __init__(self, binary_dim):
        # creating lookup table for converting int to binary
        self.int2binary = {}
        
        self.largest_number = pow(2,binary_dim)
        range_numbers = range(self.largest_number)
        
        # genrating corresponding binary array
        # for example binary[0] = array([0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)
        binary = np.unpackbits(np.array([range_numbers],dtype=np.uint8).T,axis=1)
        
        # adding binary array to int2binary (lookup table)
        for i in range_numbers:
            self.int2binary[i] = binary[i]
    
    # generate a sample addition problem (a + b = c)
    @staticmethod
    def get_sample_addition_problem(self):
        a_int = np.random.randint(self.largest_number/2) # int version # generate random int between [1,largest_number/2)
        a = self.int2binary[a_int] # binary encoding

        b_int = np.random.randint(self.largest_number/2) # int version
        b = self.int2binary[b_int] # binary encoding

        # true answer => summation
        c_int = a_int + b_int
        c = self.int2binary[c_int]

        return a, b, c, a_int, b_int, c_int


In [5]:
class activation(ABC):
    
    @abstractmethod
    def forward(net):
        pass
    
    @abstractmethod
    def backward(output):
        pass

**sigmoid activation function**

forward

$$ \sigma(x) = \frac{1}{1+e^{-x}}$$

backward
$$ \frac{\partial \sigma(x)}{\partial x} =  \sigma(x)(1- \sigma(x))$$

In [6]:
class sigmoid_activation(activation):
        
    def forward(net):
        return 1/(1 + np.exp(-net))
    
    def backward(output):
        return output*(1 - output)

In every layer, number of neurons along with activation function should be defined.

In [7]:
class network_layer(ABC):
    
    def __init__(self, neuron_count):
        self.neuron_count = neuron_count

In [8]:
class input_layer(network_layer): 
    
    def forward(X, W_input):
        return np.dot(X,W_input)        

In [20]:
class hidden_layer(network_layer):
    
    def __init__(self):
        
        # Save the values obtained at Hidden Layer of current state in a list to keep track
        self.hidden_layer_values  = list()
        
        # Initially, there is no previous hidden state. So append "0" for that
        self.hidden_layer_values.append(np.zeros(self.neuron_count))
        
    def save_previous_hidden_layer_value(previous_hidden_layer_value):
        self.hidden_layer_values.append(copy.deepcopy(previous_hidden_layer_value))
    
    def forward(self, input, W_hidden):
        prev_hidden = self.hidden_layer_values[-1]
        net_hidden = input + np.dot(prev_hidden, W_hidden)
        return self.sigmoid_activation.forward(net_hidden)

In [17]:
class output_layer(network_layer):
    
    def forward(hidden_layer_output, W_output):
        net_output = np.dot(hidden_layer_output, W_output)
        return self.sigmoid_activation.forward(net_output)

In [11]:
class weight:
    
    @staticmethod
    def GetWeightMatrix(first_dimension, second_dimension):
        return 2*np.random.random((first_dimension,second_dimension)) - 1

In [12]:
class loss_function(ABC):
    
    @abstractmethod
    def compute(target_value, predicted_value):
        pass
    

**Mean squared error function**

https://en.wikipedia.org/wiki/Mean_squared_error

In [18]:
class loss_function(loss_function):
    
    @staticmethod
    def mse(target_value, predicted_value):
        return np.mean((target_value - predicted_value)**2)

In [19]:
class utility:
    
    @staticmethod
    def print_result(overallError, a_int, b_int, c, d):    
        print("Error:" + str(overallError))
        print("Pred:" + str(d))
        print("True:" + str(c))
        out = 0
        for index, x in enumerate(reversed(d)):
            out += x * pow(2, index)
        print(str(a_int) + " + " + str(b_int) + " = " + str(out))
        print("------------")

In [15]:
class simple_binary_addition_rnn:
    
    def __init__(self,binary_dim, input_dimension, output_dimension, hidden_dimension, learning_rate):
        
        self.binary_dim = binary_dim
        
        # layers
        self.input_layer = new input_layer(input_dimension)
        self.hidden_layer = new hidden_layer(hidden_dimension)
        self.output_layer = new output_layer(output_dimension)
        
        # initialize weights
        self.W_input = weight.GetWeightMatrix(input_dimension, hidden_dimension)
        self.W_hidden = weight.GetWeightMatrix(hidden_dimension, hidden_dimension)
        self.W_output = weight.GetWeightMatrix(hidden_dimension, output_dimension)
        
        self.learning_rate = learning_rate
        self.overallError = 0
        
        # update values for weights
        self.output_layer_deltas = list()
        
    def feed_forward(a, b, c):   
    
        # position: location of the bit amongst binary_dim-1 bits; for example, starting point "0"; "0 - 7"
        for position in range(binary_dim):

            location = binary_dim - position - 1
            X = np.array([[a[location], b[location]]])

            # Actual value for (a+b) = c, c is an array of 8 bits, so take transpose to compare bit by bit with X value.        
            target = np.array([[c[location]]]).T            
            
            # ----------- forward ---------------
            # input_layer forward
            input_layer_output = self.input_layer.forward(X,W_in)
            
            # hidden_layer forward
            hidden_layer_output = self.hidden_layer.forward(input_layer_output, self.W_hidden)
            #net_1 = np.dot(X,W_in) + np.dot(hidden_layer_values[-1],W_h) 
            
            # self.output_layer.forward
            predicated_value = self.output_layer.forward(hidden_layer_output, W_output)            
        
            # Save the hidden layer to be used later            
            #hidden_layer_values.append(copy.deepcopy(layer_1_output))             
            # ToDo
            self.hidden_layer.save_previous_hidden_layer_value(predicated_value)
            
            # Delta rule
            # Calculate the error
            # Cost function
            # C = 1/2 x (y_true - y_pred)^2
            output_error = target - layer_2_output

            # Save the error deltas at each step as it will be propagated back
            output_layer_deltas.append((output_error)*sigmoid_output_to_derivative(layer_2_output))

            # Save the sum of error at each binary position
            overallError += np.abs(output_error[0])

            # Round off the values to nearest "0" or "1" and save it to a list
            d[location] = np.round(layer_2_output[0][0])
    
    
    
    def __train__(self, epochs_count):
        
        for epoch in range(epochs_count):
            
            # sample a + b = c
            # for example: 2 + 3 = 5 => (a) 00000010 + (b) 00000011 = (c) 00000101
            a, b, c, a_int, b_int, c_int = dataset.get_sample_addition_problem()
            
            # where we'll store our best guess (binary encoded)
            # desired predictions => d
            d = np.zeros_like(c)  
            
            feed_forward(a, b ,c)
            
            back_propagating(a, b)
    
            # Print out the Progress of the RNN
            if (epoch % 1000 == 0):
                print_result(overallError, a_int, b_int, c, d)
                
    
    
        

SyntaxError: invalid syntax (<ipython-input-15-271c5511a9c6>, line 8)