In [1]:
import numpy as np
from copy import deepcopy

In [2]:
def sigmoid(x, derivation=False):
    if not derivation:
        return 1/(1+np.exp(-x))
    else:
        # (input need to be x = sigmoid(y))
        return x * (1 - x)

In [3]:
int2binary = {}
binary_dim = 8

largest_number = pow(2,binary_dim)
binary = np.unpackbits(np.array([range(largest_number)],dtype=np.uint8).T,axis=1)
for i in range(largest_number):
    int2binary[i] = binary[i]

In [4]:
class network:
    def __init__(self):
        self.layers = None
        self.layers_list = []
        self.root_layer = None
        self.last_layer = None
        
        self.reste_weights = []
        
    def add_layer(self, layer_shape):
        if(self.layers == None):
            self.layers = layer(layer_shape)
            self.root_layer = self.layers
            self.layers.root = True
        else:
            new_layer = layer(layer_shape)
            assert (self.layers.weight.shape[-1] == new_layer.weight.shape[0])
            new_layer.back = self.layers
            
            self.layers.next = new_layer
            self.layers = self.layers.next
            
        self.layers_list.append(self.layers)
        self.reste_weights.append(deepcopy(self.layers))
        self.last_layer = self.layers
        
    def forward(self, X, y):
        current_layer = self.root_layer
        prev_layer = None

        last_activation = X   
        while current_layer != None:
            prev_layer = current_layer
            
            if not current_layer.not_reset:
                current_layer.history = np.zeros(current_layer.weight.shape[1])
                current_layer.activation_history = [np.zeros(current_layer.weight.shape[1])]

                current_layer.future = np.zeros(current_layer.weight.shape[1])
                current_layer.not_reset = True

            accc = np.dot(last_activation, current_layer.weight)
            current_layer.plain = accc
            
            if not current_layer.root and current_layer.next != None:
                accc = current_layer.back.plain + np.dot(current_layer.history, current_layer.weight)
                current_layer.plain = accc
                current_layer.activation = sigmoid(accc)
                current_layer.history = current_layer.activation
            else:
                current_layer.activation = sigmoid(accc)
            
            current_layer.activation_history.append(deepcopy(current_layer.activation))
                            
            last_activation = current_layer.activation
            current_layer = current_layer.next    
    
        error = y - prev_layer.activation
        
        delta = (error) * sigmoid(prev_layer.activation, derivation=True)
        
        prev_layer.delta_history.append(delta)
        
        return error, delta 
       
    def backprop(self, index, hint):
        current_layer = self.last_layer
        current_layer = current_layer.back
        
        current_layer.next.update += np.atleast_2d(current_layer.activation_history[-index - 1]).T.dot(current_layer.next.delta_history[-index-1])
        while current_layer.back != None:
            if(current_layer.next.next == None):
                current_layer.delta = (current_layer.future.dot(current_layer.weight.T) + current_layer.next.delta_history[-index - 1].dot(current_layer.next.weight.T))  * \
                sigmoid(current_layer.activation_history[-index - 1], derivation=True)
            else:
                current_layer.delta = (current_layer.future.dot(current_layer.weight.T) + current_layer.next.delta.dot(current_layer.next.weight.T))  * \
                sigmoid(current_layer.activation_history[-index - 1], derivation=True)
              
            current_layer.future = current_layer.delta
            current_layer.update += np.atleast_2d(current_layer.activation_history[-index - 2]).T.dot(current_layer.delta)                        
            
            current_layer.not_reset = False
            current_layer = current_layer.back
            
        current_layer.update += hint.T.dot(current_layer.next.delta)

    def update_weights(self, lr=0.1):
        current_layer = self.root_layer
        while current_layer != None:
            current_layer.weight += current_layer.update * lr
            current_layer.update = np.zeros_like(current_layer.weight)
            current_layer = current_layer.next

In [5]:
class layer:
    def __init__(self, shape):
        self.weight = 2 * np.random.random(shape) - 1
        self.activation = None
        self.next = None
        self.back = None
        
        self.activation_history = []
        self.delta_history = []
        self.future = None
        
        self.update = np.zeros_like(self.weight)
        self.not_reset = False
        
        self.root = False

In [6]:
model = network()
model.add_layer((2, 16))
model.add_layer((16, 16))
model.add_layer((16, 16))
model.add_layer((16, 1))

In [7]:
def get_random_number(max_number):
    global int2binary
    index = np.random.randint(max_number//2)
    return index, int2binary[index]

In [8]:
np.random.seed(0)

for j in range(50000):
    number_a, binary_a = get_random_number(largest_number)
    number_b, binary_b = get_random_number(largest_number)
    number_c = number_a + number_b
    binary_c = int2binary[number_c]
    
    number_a, binary_a = get_random_number(largest_number)
    number_b, binary_b = get_random_number(largest_number)
    number_c = number_a + number_b
    binary_c = int2binary[number_c]
    
    predicted_d = np.zeros_like(binary_c)
    
    model.error = 0   
    for position in range(binary_dim):   
        X = np.array([[binary_a[binary_dim - position - 1], binary_b[binary_dim - position - 1]]])
        y = np.array([[binary_c[binary_dim - position - 1]]]).T

        error, delta = model.forward(X, y)
        model.error += np.abs(error[0])    
      
        predicted_d[binary_dim - position - 1] = np.round(model.layers_list[-2].activation[0][0])
           
    for position in range(binary_dim):
        X = np.array([[binary_a[position], binary_b[position]]])        
        model.backprop(position, X)
    
    model.update_weights()

    if(j % 1000 == 0):
        print("Error:" + str(model.error))

Error:[3.91222995]
Error:[3.9807459]
Error:[3.7766781]
Error:[3.8955472]
Error:[3.93633543]
Error:[3.9391484]
Error:[3.76891634]
Error:[3.15148586]
Error:[2.89824718]
Error:[1.60611771]
Error:[2.4416812]
Error:[2.04229347]
Error:[2.8324819]
Error:[0.87739575]
Error:[1.20450642]
Error:[0.97796444]
Error:[0.99277245]
Error:[0.28491034]
Error:[0.72561832]
Error:[0.54280085]
Error:[0.56233072]
Error:[0.30364332]
Error:[0.41061162]
Error:[0.29634781]
Error:[0.10989977]
Error:[0.74120663]
Error:[0.26088008]
Error:[0.07895541]
Error:[0.62199828]
Error:[0.09463485]
Error:[0.33909516]
Error:[0.17349216]
Error:[1.38725325]
Error:[0.20859493]
Error:[0.24593607]
Error:[0.14610022]
Error:[0.23899025]
Error:[0.52393117]
Error:[0.28595834]
Error:[0.5736941]
Error:[0.20088822]
Error:[0.29609806]
Error:[0.09263957]
Error:[0.07650281]
Error:[0.80191915]
Error:[0.13564556]
Error:[0.17952487]
Error:[0.15397945]
Error:[0.15549367]
Error:[0.41394396]
