# Learning Binary Addition with a Recurrent Neural Network (RNN)

This example of an RNN is mostly directly taken from @iamtrask's [blog post](https://iamtrask.github.io/2015/11/15/anyone-can-code-lstm/). The code below takes his code and builds a class out of it so that you can parameterize your own RNN.

Also, it adds a `train` method that can be used to perform a custom addition operation after the net has been trained.

This RNN learns how to do binary addition.

In [159]:
import numpy as np, copy

class RNN:
    
    def __init__(self, binary_dim=8, alpha=0.1, input_dim=2, hidden_dim=16, output_dim=1, iterations=10000):
        self.binary_dim = binary_dim
        self.alpha = alpha
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.iterations = iterations
        self.largest_number = 0
        self.int2binary = {}
        
        np.random.seed()
        
        self.prepare_int2binary_map()
        
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    
    def sigmoid_output_derivative(self,x):
        return x*(1-x)
    
    def prepare_int2binary_map(self):
        self.largest_number = pow(2, self.binary_dim)
        binary = np.unpackbits(np.array([range(self.largest_number)], dtype=np.uint8).T, axis=1)
        for i in range(self.largest_number):
            self.int2binary[i] = binary[i]
    
    def test(self, a_int, b_int):
        
        correct_answer = a_int + b_int
        
        a = self.int2binary[a_int]
        b = self.int2binary[b_int]
        
        layer1_values = list()
        layer1_values.append(np.zeros(self.hidden_dim))
        layer2_values = np.zeros(self.binary_dim)
        
        for position in range(self.binary_dim):
                
            # generate input and output
            X = np.array([[a[self.binary_dim - position - 1],b[self.binary_dim - position - 1]]])

            # hidden layer
            layer1 = self.sigmoid(np.dot(X,self.W0) + np.dot(layer1_values[-1],self.Wh))
            layer1_values.append(layer1)

            # output layer
            layer2_values[self.binary_dim - position - 1] = self.sigmoid(np.dot(layer1,self.W1))
        
        guess = np.packbits(np.round(layer2_values).astype(int))
        #print("%d + %d = %d\tGuess: %d" % (a_int, b_int, correct_answer, np.packbits(np.round(layer2_values).astype(int))))
        return guess
        
    def train(self):
        #initialize weights
        self.W0 = 2*np.random.random((self.input_dim, self.hidden_dim)) - 1
        self.W1 = 2*np.random.random((self.hidden_dim, self.output_dim)) - 1
        self.Wh = 2*np.random.random((self.hidden_dim, self.hidden_dim)) - 1
        
        W0_update = np.zeros_like(self.W0)
        W1_update = np.zeros_like(self.W1)
        Wh_update = np.zeros_like(self.Wh)
        
        for j in range(self.iterations):
            # create a random binary addition problem
            a_int = np.random.randint(self.largest_number/2)
            a = self.int2binary[a_int]
            
            b_int = np.random.randint(self.largest_number/2)
            b = self.int2binary[b_int]
            
            # true answer
            c_int = a_int + b_int
            c = self.int2binary[c_int]
            
            #print("%d + %d = %d" %(a_int, b_int, c_int))
            
            # place to store our best guess
            d = np.zeros_like(c)
            
            overall_error = 0
            
            layer2_deltas = list()
            layer1_values = list()
            layer1_values.append(np.zeros(self.hidden_dim))
    
            for position in range(self.binary_dim):
                
                # generate input and output
                X = np.array([[a[self.binary_dim - position - 1],b[self.binary_dim - position - 1]]])
                y = np.array([[c[self.binary_dim - position - 1]]]).T
                
                # hidden layer
                layer1 = self.sigmoid(np.dot(X,self.W0) + np.dot(layer1_values[-1],self.Wh))
                
                # output layer
                layer2 = self.sigmoid(np.dot(layer1,self.W1))
                
                #print(layer2)
                
                # measure error
                layer2_error = y - layer2
                layer2_deltas.append((layer2_error)*self.sigmoid_output_derivative(layer2))
                overall_error += np.abs(layer2_error[0])
                
                # decode guessed solution
                d[self.binary_dim - position - 1] = np.round(layer2[0][0])
                
                # store hidden layer for use in next time step
                layer1_values.append(copy.deepcopy(layer1))
            
            future_layer1_delta = np.zeros(self.hidden_dim)
            
            for position in range(self.binary_dim):
                
                X = np.array([[a[position],b[position]]])
                layer1 = layer1_values[-position - 1]
                prev_layer1 = layer1_values[-position - 2]
                
                # error at output layer
                layer2_delta = layer2_deltas[-position-1]
                
                # error at hidden layer
                layer1_delta = \
                    (future_layer1_delta.dot(self.Wh.T) + \
                    layer2_delta.dot(self.W1.T)) * \
                    self.sigmoid_output_derivative(layer1)
                    
                # update all weights
                W1_update += np.atleast_2d(layer1).T.dot(layer2_delta)
                Wh_update += np.atleast_2d(prev_layer1).T.dot(layer1_delta)
                W0_update += X.T.dot(layer1_delta)
                
                future_layer1_delta = layer1_delta
                
            self.W0 += W0_update * self.alpha
            self.W1 += W1_update * self.alpha
            self.Wh += Wh_update * self.alpha
            
            W0_update *= 0
            W1_update *= 0
            Wh_update *= 0
            
            # print progress
            if j % 2000 == 0:
                print("Error: %.5f" % overall_error)
                #print("Pred: %s" % str(d))
                #print("True: %s" % str(c))
                out = 0
                for index,x in enumerate(reversed(d)):
                    out += x*pow(2, index)
                print("%d + %d = %d" % (a_int,b_int,out))
                print("---------")
                


# Train

Next, let's train the model.

In [157]:
rnn = RNN(iterations=10000)
rnn.train()

Error: 3.81032
57 + 1 = 254
---------
Error: 4.23229
42 + 89 = 118
---------
Error: 3.58486
53 + 46 = 91
---------
Error: 1.00315
85 + 20 = 105
---------
Error: 0.63371
60 + 45 = 105
---------


# Test

Now, let's see how our model does if we give it a whole bunch of random addition problems.

In [160]:
num_attempts = 10000
answers = np.zeros((num_attempts,2))
num_correct = 0

for i in range(num_attempts):
    a_int = np.random.randint(rnn.largest_number/2)
    b_int = np.random.randint(rnn.largest_number/2)
    answers[i][0] = rnn.test(a_int, b_int)
    answers[i][1] = a_int + b_int
    if answers[i][0] == answers[i][1]:
        num_correct += 1
    
print("Correct: %d/%d" % (num_correct, num_attempts))
print("Accuracy: %.3f" % (float(num_correct)/num_attempts))


Correct: 10000/10000
Accuracy: 1.000


# Results

100% accuracy on 10000 examples. Nice work, RNN!