In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import numpy as np
import matplotlib.pyplot as plt

# sigmoid function
# we don't use the logistic itself since it is slightly more costly
from scipy.special import expit 

import pandas as pd

In [327]:
class MLP():
    def __init__(self, dimensions=[], momentum=0.9,
                classification=True, learning_rate=1, use_bias=True):
        if not dimensions:
            raise ValueError("Must pass input dimensions!")
        if len(dimensions) < 2:
            raise ValueError("At least 2 layers needed")
            
        self.classification = classification
        self.use_bias = use_bias
        self.momentum = momentum
        self.lr = learning_rate
        # initialize weights - we suposse our bias is built into the weights
        self.velocities = []
        self.weights = []
        self.grads = []
        
        # Add weights and gradients
        for idx, (input_dim, output_dim) in enumerate(dimensions):
            if use_bias:
                input_dim += 1
                if idx != len(dimensions) - 1:
                    output_dim += 1
                    
            weight = np.random.uniform(low=-0.1, high=0.100001, 
                                        size=(input_dim, output_dim))
            self.weights.append(weight)
            self.grads.append(np.zeros_like(weight))
            self.velocities.append(np.zeros_like(weight))

    def extend_with_bias(self, matrix):
        # Add bias term to a matrix
        new = np.ones((matrix.shape[0], 
                            matrix.shape[1]+1))
        new[:, :-1] = matrix
        return new
    
    def forward_pass(self, x_train):
        # Output of each layer
        current = x_train
        outputs = []
        for layer in self.weights:
            output = expit(current.dot(layer))
            outputs.append(output)
            current = output
        
        return outputs
    
    def fit(self, original_x, original_y, n_iter=100, testing=False,
           verbose=False):
        
        if self.use_bias:
            x_train = self.extend_with_bias(original_x)
        else:
            x_train = original_x
            
        y_train = original_y.reshape(len(original_y), 1)
           
        for _ in range(n_iter):   
            outputs = self.forward_pass(x_train)
            input_layers = [x_train] + outputs[:-1]
            
            # The vary last layer we do manually
            last_error = (y_train - outputs[-1])
            last_delta = last_error * (outputs[-1]*(1-outputs[-1]))
            self.grads[-1] += outputs[-2].T.dot(last_delta)
            
            # Go through backwards
            previous_delta = last_delta
            backward_outputs = outputs[:-1][::-1]
            idxs = range(len(self.weights)-1, 0,-1)
            
            for idx, output, prev_layer in zip(idxs, backward_outputs, input_layers):
                # Calculate gradient
                layer_error = np.dot(previous_delta, self.weights[idx].T)
                layer_delta = layer_error * output*(1-output)
                grad = prev_layer.T.dot(layer_delta)
                
                self.grads[idx-1] = grad
                previous_delta = layer_delta
                
            # Update 
            for idx, (grad, last_v) in enumerate(zip(self.grads, self.velocities)):
                # Update weights according to momentum
                new_v = self.momentum * last_v + self.lr*grad
                self.weights[idx] += new_v
                # Update the new velocites and set the gradients to 0
                self.velocities[idx] = new_v
                self.grads[idx] = np.zeros_like(self.weights[idx])
                
    def predict(self, x_test):
        if self.use_bias:
            new_x_test = self.extend_with_bias(x_test)
        else:
            new_x_test = x_test
        output = self.forward_pass(new_x_test)[-1]
        
        if self.classification:
            return (output > 0.5).astype(int)
        else:
            return output
    

In [328]:
x = []
y = []

for i in [0, 1]:
    for j in [0, 1]:
        x.append([i, j])
        y.append(i != j)

x = np.array(x)
y = np.array(y)
# Use this if you want more than 1 output node:
#y = np.vstack((y, y)).T
y = y.astype(int)

In [356]:
dimensions = [(2,5), (5,1)]
mlp = MLP(dimensions=dimensions, learning_rate=1, 
         use_bias=True)

mlp.fit(x, y, n_iter=10000, verbose=False)

In [357]:
preds = mlp.predict(x)#.reshape(1, -1)
#pd.DataFrame(np.vstack((preds, y)).T, columns=["MLP predito", "Resposta"])

In [358]:
preds

array([[0],
       [1],
       [1],
       [0]])