# Recurrent Neural Network in NumPy

In [519]:
import numpy as np
import sklearn as sk
import copy

In [1224]:
def relu(x):
    return x * (x > 0)

def relu_derivative(x):
    return np.array(x * (x > 0) != 0, dtype='int')

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 4/(np.exp(2*x) + 2 + np.exp(-2*x))

_ACTIVATION_1 = sigmoid
_ACTIVATION_1_DERIVATIVE = sigmoid_derivative

_ACTIVATION_2 = sigmoid
_ACTIVATION_2_DERIVATIVE = sigmoid_derivative

In [1182]:
int2binary = {}

num_range = 2**8
    
_DIGITS = 8

binary = np.unpackbits(np.array([range(num_range)],dtype=np.uint8).T,axis=1)
for i in range(num_range):
    int2binary[i] = binary[i]
    
def generate_row(num1, num2):
    row = np.zeros((1, _DIGITS*3))
    row[0,: _DIGITS*2:2] = int2binary[num1][::-1]
    row[0,1:_DIGITS*2:2] = int2binary[num2][::-1]
    row[0,_DIGITS*2:] = int2binary[num1+num2][::-1]
    return row

def data_generator(data_size):
    data = np.zeros((data_size, _DIGITS*3))
    for row_index in range(data_size):
        a = np.random.randint(int(num_range/4))
        b = np.random.randint(int(num_range/4))
        data[row_index,:] = generate_row(a,b)[0,:]
    return data

def logit2binary(x):
    ou = np.array(copy.deepcopy(x))
    return np.array(ou * (ou > 0.5) != 0, dtype='float')

def binary2int(x):
    return np.array([((2**logit)*bit) for logit, bit in enumerate(x[-1::-1])]).sum()

def input_feed(data, batch_size):
    for index in range(0, len(data), batch_size):
        if index + batch_size < len(data):
            yield {'train_data': data[index:index + batch_size, :_DIGITS*2],
                   'target_data': data[index:index + batch_size, _DIGITS*2:]}

## Model

In [1258]:
class Rnn:
    
    def __init__(self, input_dim):
        self.h_size = 16
        self.o_size = 1
        self.batch_size = None
        self.lrate = None
        
        self.layers = {
            'input': [],
            'hidden': [],
            'output': []
        }
        
        self.synapse_ih = {
            'weights': np.random.rand(input_dim, self.h_size)*2 - 1,
            'biases': np.zeros((1, self.h_size))
        } 
        self.synapse_hh = {
            'weights': np.random.rand(self.h_size, self.h_size)*2 - 1
        }
        self.synapse_ho = {
            'weights': np.random.rand(self.h_size, self.o_size)*2 - 1,
            'biases': np.zeros((1, self.o_size))
        }
    
    def train(self, train_data, batch_generator, batch_size=1, epochs=10000, lrate=0.1):
        self.batch_size = batch_size
        self.lrate = lrate
        data = copy.deepcopy(train_data)
        for epoch in range(epochs):
            epoch_error = 0
            data = sk.utils.shuffle(data)
            
            # ORIGINAL: for batch in batch_generator(data, self.batch_size):
            
            # TEMPORARY:
            for _ in range(1):
                
                # TEMPORARY:
                ##################################################
                num1 = np.random.randint(num_range/2)
                num2 = np.random.randint(num_range/2)
                a = int2binary[num1]
                b = int2binary[num2]
                c = int2binary[num1+num2]
#                 X = np.array([[a[_DIGITS - position - 1],b[binary_dim - position - 1]]])
#                 y = np.array([[c[_DIGITS - position - 1]]]).T
                ##################################################
    
                # ORIGINAL: prediction = np.array(self._forward_prop(batch['train_data'])).reshape(_DIGITS,self.batch_size).T
               
                # TEMPORARY:
                prediction = np.array(self._forward_prop(a, b)).reshape(_DIGITS,1).T
    
                # ORIGINAL: error = -(batch['target_data']-prediction)
                
                # TEMPORARY:
                error = -(c - prediction)
                
                self._back_prop(c, prediction)
                epoch_error += (error**2).sum()
                
            if epoch % 50 == 0:
                print("Epoch: {0} Train Error: {1}".format(epoch, epoch_error/len(train_data)))
                
            if epoch % 100 == 0:
                # TEMPORARY:
                print("---------------------------------------")
                print("{0} + {1} = {2} ({3}), Prediction: {4}".format(num1, num2, c[::-1], num1+num2, prediction[::-1]))
                print("---------------------------------------")
                
                # ORIGINAL:
#                 t1 = np.array(self._predict(generate_row(5,5))).reshape(_DIGITS)
#                 t2 = np.array(self._predict(generate_row(10,2))).reshape(_DIGITS)
#                 t3 = np.array(self._predict(generate_row(50,3))).reshape(_DIGITS)
#                 t4 = np.array(self._predict(generate_row(4,9))).reshape(_DIGITS)
                
#                 print("---------------------------------------")
#                 print("Task: {0} + {1} Target: {2} ({3}) Prediciton: {4} ({5})".format(5, 5, int2binary[10], 10, t1, binary2int(logit2binary(t1))))
#                 print("Task: {0} + {1} Target: {2} ({3}) Prediciton: {4} ({5})".format(10, 2, int2binary[12], 12, t2, binary2int(logit2binary(t2))))
#                 print("Task: {0} + {1} Target: {2} ({3}) Prediciton: {4} ({5})".format(50, 3, int2binary[53], 53, t3, binary2int(logit2binary(t3))))
#                 print("Task: {0} + {1} Target: {2} ({3}) Prediciton: {4} ({5})".format(4, 9, int2binary[13], 13, t4, binary2int(logit2binary(t4))))
#                 print("---------------------------------------")
                
    def _predict(self, x):
        temp_s = self.batch_size
        self.batch_size = 1
        prediction = self._forward_prop(x)
        self.batch_size = temp_s
        return prediction
    
    def _forward_prop(self, a, b):
        """assumes x is a sequence input and first dim is seq length"""
        self.layers['input'] = []
        self.layers['hidden'] = [np.zeros((self.batch_size, self.h_size))]
        self.layers['output'] = []
        # TODO: different iterator
        for index in range(_DIGITS):
            # ORIGINAL: self.layers['input'].append(np.atleast_2d(x[:,index*2:index*2+2]))

            # TEMPORARY:
            ##################################################
            X = np.array([[a[-index-1],b[-index-1]]])
            self.layers['input'].append(np.atleast_2d(X))
            ##################################################
            
            self.layers['hidden'].append(_ACTIVATION_1(np.dot(self.layers['input'][-1], self.synapse_ih['weights']) + 
                                                      np.dot(self.layers['hidden'][-1], self.synapse_hh['weights']) +
                                                      self.synapse_ih['biases']))
            self.layers['output'].append(_ACTIVATION_2(np.dot(self.layers['hidden'][-1], self.synapse_ho['weights']) +
                                                       self.synapse_ho['biases']))
    
        return self.layers['output']
        
    def _back_prop(self, c, prediction):
        synapse_update_ih = {
            'weights': np.zeros_like(self.synapse_ih['weights']),
            'biases': np.zeros_like(self.synapse_ih['biases'])
        }
        synapse_update_hh = {
            'weights': np.zeros_like(self.synapse_hh['weights'])
        }
        synapse_update_ho = {
            'weights': np.zeros_like(self.synapse_ho['weights']),
            'biases': np.zeros_like(self.synapse_ho['biases'])
        }
        
        delta_hh = None
        delta_next_hh = np.zeros((self.batch_size, self.h_size))
        # delta_ho = [np.multiply(_ACTIVATION_2_DERIVATIVE(self.layers['output'][index]), errors[:,index].reshape(self.batch_size, self.o_size)) for index in range(_DIGITS)]
        
        delta_ho = []
        for index in range(_DIGITS):
            error = -(c[-index-1]-prediction[0,index])
            delta_ho.append(_ACTIVATION_2_DERIVATIVE(self.layers['output'][index])*error)
        
        for index in range(_DIGITS):
            synapse_update_ho['weights'] +=  np.dot(self.layers['output'][-index-1].T, np.atleast_2d(delta_ho[-index-1]))
            # synapse_update_ho['biases'] +=  np.atleast_2d(delta_ho[-index].sum(axis=0))
            
            delta_hh = np.multiply((np.dot(delta_next_hh, self.synapse_hh['weights'].T) + 
                         np.dot(delta_ho[-index-1], self.synapse_ho['weights'].T)), _ACTIVATION_1_DERIVATIVE(self.layers['hidden'][-index-1]))
            
            synapse_update_hh['weights'] += np.dot(self.layers['hidden'][-index-2].T, np.atleast_2d(delta_hh))
            
            synapse_update_ih['weights'] += np.dot(self.layers['input'][-index-1].T, np.atleast_2d(delta_hh))
            # synapse_update_ih['biases'] += np.atleast_2d(delta_hh.sum(axis=0))
            delta_next_hh = delta_hh
        
        self.synapse_ih['weights'] +=  -self.lrate * (synapse_update_ih['weights']/self.batch_size)
        self.synapse_hh['weights'] +=  -self.lrate * (synapse_update_hh['weights']/self.batch_size)
        self.synapse_ho['weights'] +=  -self.lrate * (synapse_update_ho['weights']/self.batch_size)
        
#         self.synapse_ih['biases'] +=  -self.lrate * (synapse_update_ih['biases']/self.batch_size)
#         self.synapse_ho['biases'] +=  -self.lrate * (synapse_update_ho['biases']/self.batch_size)
    

In [1256]:
np.random.seed(0)
np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})
data = data_generator(10)
rnn_object = Rnn(input_dim=2)
rnn_object.train(train_data=data, batch_generator=input_feed)

# test1 = generate_row(1,1)
# test2 = generate_row(100,100)
# test3 = generate_row(2,20)
# test4 = generate_row(100,2)

# print("Expected: {0} Predicted: {1}".format(binary2int(test1[0,_DIGITS*2:][::-1]), binary2int(logit2binary(rnn_object._predict(test1))[::-1])))
# print("Expected: {0} Predicted: {1}".format(binary2int(test2[0,_DIGITS*2:][::-1]), binary2int(logit2binary(rnn_object._predict(test2))[::-1])))
# print("Expected: {0} Predicted: {1}".format(binary2int(test3[0,_DIGITS*2:][::-1]), binary2int(logit2binary(rnn_object._predict(test3))[::-1])))
# print("Expected: {0} Predicted: {1}".format(binary2int(test4[0,_DIGITS*2:][::-1]), binary2int(logit2binary(rnn_object._predict(test4))[::-1])))


Epoch: 0 Train Error: 0.38625409931722154
---------------------------------------
60 + 33 = [1 0 1 1 1 0 1 0] (93), Prediction: [[0.24 0.21 0.11 0.13 0.12 0.13 0.11 0.07]]
---------------------------------------
Epoch: 50 Train Error: 0.18937244377062087
Epoch: 100 Train Error: 0.20024369202122916
---------------------------------------
16 + 59 = [1 1 0 1 0 0 1 0] (75), Prediction: [[0.54 0.70 0.50 0.49 0.65 0.61 0.44 0.44]]
---------------------------------------
Epoch: 150 Train Error: 0.17354829124679755
Epoch: 200 Train Error: 0.22008143757949244
---------------------------------------
31 + 30 = [1 0 1 1 1 1 0 0] (61), Prediction: [[0.44 0.61 0.52 0.50 0.51 0.40 0.27 0.30]]
---------------------------------------
Epoch: 250 Train Error: 0.1607866884919064
Epoch: 300 Train Error: 0.19972871123414157
---------------------------------------
21 + 23 = [0 0 1 1 0 1 0 0] (44), Prediction: [[0.53 0.67 0.49 0.43 0.37 0.48 0.27 0.27]]
---------------------------------------
Epoch: 350 Train



Epoch: 3250 Train Error: nan
Epoch: 3300 Train Error: nan
---------------------------------------
59 + 5 = [0 0 0 0 0 0 1 0] (64), Prediction: [[nan nan nan nan nan nan nan nan]]
---------------------------------------
Epoch: 3350 Train Error: nan
Epoch: 3400 Train Error: nan
---------------------------------------
43 + 39 = [0 1 0 0 1 0 1 0] (82), Prediction: [[nan nan nan nan nan nan nan nan]]
---------------------------------------
Epoch: 3450 Train Error: nan
Epoch: 3500 Train Error: nan
---------------------------------------
32 + 2 = [0 1 0 0 0 1 0 0] (34), Prediction: [[nan nan nan nan nan nan nan nan]]
---------------------------------------
Epoch: 3550 Train Error: nan
Epoch: 3600 Train Error: nan
---------------------------------------
46 + 33 = [1 1 1 1 0 0 1 0] (79), Prediction: [[nan nan nan nan nan nan nan nan]]
---------------------------------------
Epoch: 3650 Train Error: nan
Epoch: 3700 Train Error: nan
---------------------------------------
1 + 33 = [0 1 0 0 0 1 0 

In [1245]:
np.random.seed(1)
a = np.random.randint(int(num_range/2))
b = np.random.randint(int(num_range/2))

row = generate_row(a, b)
print(row)
print(row[0,:_DIGITS*2:2], row[0,1:_DIGITS*2:2], row[0,_DIGITS*2:])
print(binary2int(row[0,:_DIGITS*2:2][::-1]), binary2int(row[0,1:_DIGITS*2:2][::-1]), binary2int(row[0,_DIGITS*2:][::-1]))

print(int2binary[a],  binary2int(int2binary[b]), binary2int(int2binary[a+b]))
print(a, b , a+b)

[[1.00 1.00 0.00 1.00 1.00 0.00 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00
  0.00 0.00 0.00 0.00 0.00 0.00 1.00 0.00 0.00 1.00]]
[1.00 0.00 1.00 0.00 0.00 1.00 0.00 0.00] [1.00 1.00 0.00 1.00 0.00 1.00 1.00 0.00] [0.00 0.00 0.00 0.00 1.00 0.00 0.00 1.00]
37.0 107.0 144.0
[0 0 1 0 0 1 0 1] 107 144
37 107 144


In [1246]:
print(np.array([1,2])[-2])

1
