# Recurrent Neural Network in NumPy

In [295]:
import numpy as np
import sklearn as sk
import copy

In [52]:
def relu(x):
    return x * (x > 0)

def relu_derivative(x):
    return np.array(x * (x > 0) != 0, dtype='int')

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivate(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [414]:
int2binary = {}

num_range = 2**8

_DIGITS = 8

binary = np.unpackbits(np.array([range(num_range)],dtype=np.uint8).T,axis=1)
for i in range(num_range):
    int2binary[i] = binary[i]

def data_generator(data_size):
    data = np.zeros((data_size, _DIGITS*3))
    for row_index in range(data_size):
        a = np.random.randint(int(num_range/2))
        b = np.random.randint(int(num_range/2))
        for col_index in range(_DIGITS):
            data[row_index, col_index*2] = int2binary[a][col_index]
            data[row_index, col_index*2+1] = int2binary[b][col_index]
        data[row_index, _DIGITS*2:] = int2binary[a + b]
    return data

def input_feed(data, batch_size):
    for index in range(0, len(data), batch_size):
        yield {'train_data': data[index:index + batch_size, :_DIGITS*2],
               'target_data': data[index:index + batch_size, _DIGITS*2:]}

## Model

In [466]:
class Rnn:
    
    def __init__(self, input_dim):
        self.h_size = 10
        self.o_size = 1
        self.batch_size = None
        self.lrate = None
        
        self.layers = {
            'input': [],
            'hidden': [],
            'output': []
        }
        
        self.synapse_ih = {
            'weights': np.random.rand(input_dim, self.h_size),
            'biases': np.random.rand(1, self.h_size)
        } 
        self.synapse_hh = {
            'weights': np.random.rand(self.h_size, self.h_size)
        }
        self.synapse_ho = {
            'weights': np.random.rand(self.h_size, self.o_size),
            'biases': np.random.rand(1, self.o_size)
        }
    
    def train(self, train_data, batch_generator, batch_size=1, epochs=10, lrate=0.01):
        self.batch_size = batch_size
        self.lrate = lrate
        data = copy.deepcopy(train_data)
        for epoch in range(epochs):
            epoch_error = 0
            data = sk.utils.shuffle(data)
            for batch in batch_generator(data, self.batch_size):
                prediction = np.array(self._forward_prop(batch['train_data'])).reshape(_DIGITS,self.batch_size).T
                error = batch['target_data']-prediction
                self._back_prop(error)
            
    def test(self):
        pass
    
    def _predict(self):
        pass
    
    def _forward_prop(self, x):
        """assumes x is a sequence input and first dim is seq length"""
        self.layers['input'] = []
        self.layers['hidden'] = [np.zeros((self.batch_size, self.h_size))]
        self.layers['output'] = []
        # TODO: different iterator
        for index in range(_DIGITS):
            self.layers['input'].append(np.atleast_2d(x[:,index*2:index*2+2]))
            self.layers['hidden'].append(sigmoid(np.dot(self.layers['input'][-1], self.synapse_ih['weights']) + 
                                                      np.dot(self.layers['hidden'][-1], self.synapse_hh['weights']) +
                                                      self.synapse_ih['biases']))
            self.layers['output'].append(sigmoid(np.dot(self.layers['hidden'][-1], self.synapse_ho['weights']) +
                                                       self.synapse_ho['biases']))
        
        return self.layers['output']
        
    def _back_prop(self, errors):
        synapse_update_ih = {
            'weights': np.zeros_like(self.synapse_ih['weights']),
            'biases': np.zeros_like(self.synapse_ih['biases'])
        }
        synapse_update_hh = {
            'weights': np.zeros_like(self.synapse_hh['weights'])
        }
        synapse_update_ho = {
            'weights': np.zeros_like(self.synapse_ho['weights']),
            'biases': np.zeros_like(self.synapse_ho['biases'])
        }
        
        delta_hh = np.zeros((1, self.h_size))
        delta_next_hh = np.zeros((1, self.h_size))
        delta_ho = [np.multiply(sigmoid_derivate(self.layers['output'][index]), errors[:,index].reshape(self.batch_size, self.o_size)) for index in range(_DIGITS)]
        
        for index in range(len(errors)-1,-1,-1):
            synapse_update_ho['weights'] +=  np.dot(self.layers['output'][index].T, delta_ho[index])
            synapse_update_ho['biases'] +=  np.atleast_2d(delta_ho[index].sum(axis=0))

            delta_hh = (np.dot(delta_next_hh, self.synapse_hh['weights'].T) + 
                         np.multiply(np.dot(delta_ho[index], self.synapse_ho['weights'].T), sigmoid_derivate(self.layers['hidden'][index+1])))

            synapse_update_hh['weights'] += np.dot(self.layers['hidden'][index+1].T, np.atleast_2d(delta_hh))
            
            synapse_update_ih['weights'] += np.dot(self.layers['input'][index].T, np.atleast_2d(delta_hh))
            synapse_update_ih['biases'] += np.atleast_2d(delta_hh.sum(axis=0))

            delta_next_hh = delta_hh
            
        self.synapse_ih['weights'] += - self.lrate * (synapse_update_ih['weights']/self.batch_size)
        self.synapse_hh['weights'] += - self.lrate * (synapse_update_hh['weights']/self.batch_size)
        self.synapse_ho['weights'] += - self.lrate * (synapse_update_ho['weights']/self.batch_size)
        
        self.synapse_ih['biases'] += - self.lrate * (synapse_update_ih['biases']/self.batch_size)
        self.synapse_ho['biases'] += - self.lrate * (synapse_update_ho['biases']/self.batch_size)
    

In [467]:
np.random.seed(1)
data = data_generator(10)
rnn_object = Rnn(input_dim=2, )
rnn_object.train(train_data=data, batch_generator=input_feed, batch_size=3, epochs=1)

ValueError: shapes (2,1) and (3,10) not aligned: 1 (dim 1) != 3 (dim 0)