In [177]:
import numpy as np 

class NeuralNetwork():
    def __init__(self):
        self.weights = {}  # weights
        self.bias = {} # bias
        self.num_layers = 1  # Set initial number of layer to one (input layer)
        self.adjustments_w = {}  # adjustements
        self.adjustments_b = {}  # adjustements
    
    def add_layer(self, shape):
        # inital weights in range(-1,1) 
        self.weights[self.num_layers] = 2 * np.random.random(shape) - 1
        # inital bias in range(-1,1)
        self.bias[self.num_layers] = 2 * np.random.random(shape[1]) - 1
        # inital adjustements i
        self.adjustments_w[self.num_layers] = np.zeros(shape)
        self.adjustments_b[self.num_layers] = np.zeros(shape[1])
        # plus num_layer 
        self.num_layers += 1
    
    # sigmoid
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    # derivative of sigmoid  
    def sigmoid_derivative(self, x):
        return x * (1 - x)
    
    def forward_propagate(self,data):       
        # Progapagate through network and hold values for use in back-propagation
        activation_values = {}
        activation_values[1] = data
        
        for layer in range(2,self.num_layers+1):
            # h = data.T (batch_size, input_size) * weight(input_size, output_size) + bias(output_size) 
            data = np.dot(data.T, self.weights[layer-1])+ self.bias[layer-1].T
            # a = f(h)
            data = self.sigmoid(data).T
            activation_values[layer] = data

        return activation_values
    
    def back_propagate(self, outputs, targets):
        deltas_w = {}
        deltas_b = {}
        # Delta of output Layer ( MSE derivative is 2(output-target))
        deltas_w[self.num_layers] = 2 * (outputs[self.num_layers] - targets)
        deltas_b[self.num_layers] = 2 * (outputs[self.num_layers] - targets)
        
        # Delta of hidden Layers
        for layer in reversed(range(2, self.num_layers)):  # All layers except input/output
            a_val = outputs[layer]
            weights = self.weights[layer]
            bias = self.bias[layer]
            prev_deltas_w = deltas_w[layer+1]
            prev_deltas_b = deltas_b[layer+1]
            
            deltas_w[layer] = np.multiply(np.dot(weights, prev_deltas_w), self.sigmoid_derivative(a_val))
            deltas_b[layer] = np.multiply(np.dot(bias, prev_deltas_b), self.sigmoid_derivative(a_val))
            
        # Caclculate total adjustements based on deltas
        for layer in range(1, self.num_layers):
          
         
            self.adjustments_w[layer] += np.dot(deltas_w[layer+1],outputs[layer].T).T
        
            self.adjustments_b[layer] += np.dot(deltas_b[layer+1],1).reshape(-1)

    def gradient_descente(self, batch_size, learning_rate):
        # Calculate partial derivative and take a step in that direction
        for layer in range(1, self.num_layers):
            
            partial_w = (1/batch_size) * self.adjustments_w[layer]
            partial_b = (1/batch_size) * self.adjustments_b[layer]
            

            self.weights[layer]+= learning_rate * -partial_w
            self.bias[layer]+= learning_rate*1e-3 * -partial_b
  
    
    def loss_func(self, outputs, targets):
        # MSE error
        return 0.5 * np.mean(np.sum(np.power(outputs - targets, 2), axis=1))
        
        
    def train(self, inputs, targets, num_epochs, learning_rate=1, stop_accuracy=1e-5):
        error = []
        for iteration in range(num_epochs):
            for i in range(len(inputs)):
                x = inputs[i]
                y = targets[i]
                # Pass the training set through our neural network
                output = self.forward_propagate(x)
                
                # Calculate the error
                loss = self.loss_func(output[self.num_layers - 1], y)
                error.append(loss)

                # Calculate Adjustements
                self.back_propagate(output, y)
   
            self.gradient_descente(i, learning_rate)

            # Check if accuarcy criterion is satisfied
            if np.mean(error[-(i+1):]) < stop_accuracy and iteration > 0:
                break
                

        return(np.asarray(error), iteration+1)
    
    def predict(self,data):
        # pass data through pre-trained network
        for layer in range(1,self.num_layers):
            # h = data (batch_size, input_size) * weight(input_size, output_size) + bias(output_size) 
            data = np.dot(data, self.weights[layer]) + self.bias[layer]
            # a = f(h)
            data = self.sigmoid(data)
        return data
    
if __name__ == "__main__":

    # ----------- XOR Function -----------------

    # Create instance of a neural network
    nn = NeuralNetwork()

    # Add Layers (Input layer is created by default)
    nn.add_layer((2, 1))

#     nn.add_layer((4, 1))

    # XOR function
    training_data = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]]).reshape(4, 2, 1)
    training_labels = np.asarray([[0], [1], [1], [0]])
    
    error, iteration = nn.train(training_data, training_labels, 500)
    print('Error = ', np.mean(error[-4:]))
    print('Epoches needed to train = ', iteration)

    

    training_data = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]]).reshape(4, 2)
    label = nn.predict(training_data)
    print(label)


Error =  0.191371694566
Epoches needed to train =  500
[[  9.66343547e-72]
 [  1.00000000e+00]
 [  1.00000000e+00]
 [  3.94279565e-91]]


In [89]:
training_data = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]]).reshape(4, 2, 1)
print(training_data[0].T)

[[0 0]]
