In [175]:
import numpy as np
from sklearn.metrics import mean_squared_error
from progress.bar import Bar

# The neural network class

In [2]:
class NNetwork:
    def __init__(self, layers: list, activate_function='sigmoid', cost_function='square_error'):
        self.layers = layers
        self.network = None
        
        if activate_function == 'sigmoid':
            self._activate_function = lambda x: 1/(1 + np.exp(-x))
            self._derivative_activate_function = lambda x: x * (1.0 - x)
            
        if cost_function == 'square_error':
            self._cost_function = lambda x, y: 1/2 * (x - y)**2
            self._derivative_cost_function = lambda x, y: x - y
        
        self._initialize_network()
    
    def _initialize_network(self):
        self.network = list()
        for i in range(1, len(self.layers)):
            layer = np.random.rand(self.layers[i], self.layers[i-1])
            self.network.append({"weights": layer, "forward": None, "deltas": None})
            
    def _forward_propagation(self, inputs: np.array):
        for layer in self.network:
            weigths = layer['weights']
            inputs = self._activate_function(weigths @ inputs)
            layer['forward'] = inputs
        return inputs
            
    def _back_propagation(self, y):
        for index in reversed(range(len(self.network))):
            layer = self.network[index]
            if index == len(self.network)-1:
                layer['deltas'] = (y - layer['forward']) * self._derivative_activate_function(layer['forward'])
            else:
                next_layer = self.network[index+1]
                layer['deltas'] = self._derivative_activate_function(layer['forward']) * (next_layer['deltas'] @ next_layer['weights'])        
    
    def _update_weights(self, inputs, lr):
        for index, layer in enumerate(self.network):
            if index == 0:
                x0, x1 = np.meshgrid(inputs, layer['deltas'])
                dw = lr * (x0 * x1)
            else:
                x0, x1 = np.meshgrid(self.network[index-1]['forward'], layer['deltas'])
                dw = lr * (x0 * x1)
            
            layer['weights'] += dw
    
    def fit(self, X, y, n_epoch=10000, lr=0.01, verbose_epoch=500):
        print("learning rate = %.3f" % lr)
        for epoch in range(n_epoch):
            for x_, y_ in zip(X, y):
                self._forward_propagation(x_)                
                self._back_propagation(y_)
                self._update_weights(x_, lr)
                
            if epoch % verbose_epoch == 0:
                error = np.sqrt(mean_squared_error(y, self.predict(X)))
                print('epoch=%d, error=%.3f' % (epoch, error))
            elif epoch == n_epoch-1:
                print('epoch=%d, error=%.3f' % (epoch+1, error))
            
    def predict(self, inputs: np.array, around=None):
        output = []
        for x in inputs:
            output.append(self._forward_propagation(x))
        
        output = np.array(output)
        if around is not None:
            output = np.around(output, decimals=around)
        return output

# The 3x3 counter

In [27]:
X = np.array([
    [0, 0, 0],
    [0, 0, 1],
    [0, 1, 0],
    [0, 1, 1],
    [1, 0, 0],
    [1, 0, 1],
    [1, 1, 0],
    [1, 1, 1]
])

y = np.array([
    [0, 0, 1],
    [0, 1, 0],
    [0, 1, 1],
    [1, 0, 0],
    [1, 0, 1],
    [1, 1, 0],
    [1, 1, 1],
    [0, 0, 0]
])

In [10]:
nn = NNetwork([3, 5, 5, 3])
nn.fit(X, y, lr=0.2)

learning rate = 0.200
epoch=0, error=0.613
epoch=500, error=0.424
epoch=1000, error=0.397
epoch=1500, error=0.348
epoch=2000, error=0.325
epoch=2500, error=0.285
epoch=3000, error=0.226
epoch=3500, error=0.148
epoch=4000, error=0.079
epoch=4500, error=0.058
epoch=5000, error=0.047
epoch=5500, error=0.041
epoch=6000, error=0.036
epoch=6500, error=0.033
epoch=7000, error=0.031
epoch=7500, error=0.029
epoch=8000, error=0.027
epoch=8500, error=0.026
epoch=9000, error=0.024
epoch=9500, error=0.023
epoch=10000, error=0.023


In [98]:
nn.predict(X[:], around=0)

array([[0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 1.],
       [1., 0., 0.],
       [1., 0., 1.],
       [1., 1., 0.],
       [1., 1., 1.],
       [0., 0., 0.]])

# The 4x4 counter

In [92]:
X = np.array([
    [0, 0, 0, 0],
    [0, 0, 0, 1],
    [0, 0, 1, 0],
    [0, 0, 1, 1],
    [0, 1, 0, 0],
    [0, 1, 0, 1],
    [0, 1, 1, 0],
    [0, 1, 1, 1],
    [1, 0, 0, 0],
    [1, 0, 0, 1],
    [1, 0, 1, 0],
    [1, 0, 1, 1],
    [1, 1, 0, 0],
    [1, 1, 0, 1],
    [1, 1, 1, 0],
    [1, 1, 1, 1]
])

y = np.array([
    [0, 0, 0, 1],
    [0, 0, 1, 0],
    [0, 0, 1, 1],
    [0, 1, 0, 0],
    [0, 1, 0, 1],
    [0, 1, 1, 0],
    [0, 1, 1, 1],
    [1, 0, 0, 0],
    [1, 0, 0, 1],
    [1, 0, 1, 0],
    [1, 0, 1, 1],
    [1, 1, 0, 0],
    [1, 1, 0, 1],
    [1, 1, 1, 0],
    [1, 1, 1, 1],
    [0, 0, 0, 0]
])

In [7]:
nn = NNetwork([4, 6, 6, 4])
nn.fit(X, y, n_epoch=20000, lr=0.2)

learning rate = 0.200
epoch=0, error=0.627
epoch=500, error=0.377
epoch=1000, error=0.234
epoch=1500, error=0.125
epoch=2000, error=0.107
epoch=2500, error=0.100
epoch=3000, error=0.097
epoch=3500, error=0.095
epoch=4000, error=0.094
epoch=4500, error=0.093
epoch=5000, error=0.092
epoch=5500, error=0.092
epoch=6000, error=0.091
epoch=6500, error=0.091
epoch=7000, error=0.090
epoch=7500, error=0.066
epoch=8000, error=0.045
epoch=8500, error=0.037
epoch=9000, error=0.032
epoch=9500, error=0.029
epoch=10000, error=0.027
epoch=10500, error=0.025
epoch=11000, error=0.023
epoch=11500, error=0.022
epoch=12000, error=0.021
epoch=12500, error=0.020
epoch=13000, error=0.019
epoch=13500, error=0.019
epoch=14000, error=0.018
epoch=14500, error=0.017
epoch=15000, error=0.017
epoch=15500, error=0.016
epoch=16000, error=0.016
epoch=16500, error=0.016
epoch=17000, error=0.015
epoch=17500, error=0.015
epoch=18000, error=0.015
epoch=18500, error=0.014
epoch=19000, error=0.014
epoch=19500, error=0.014
ep

In [8]:
nn.predict(X[:], around=0)

array([[0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 1., 1.],
       [0., 1., 0., 0.],
       [0., 1., 0., 1.],
       [0., 1., 1., 0.],
       [0., 1., 1., 1.],
       [1., 0., 0., 0.],
       [1., 0., 0., 1.],
       [1., 0., 1., 0.],
       [1., 0., 1., 1.],
       [1., 1., 0., 0.],
       [1., 1., 0., 1.],
       [1., 1., 1., 0.],
       [1., 1., 1., 1.],
       [0., 0., 0., 0.]])

# New realization with the grid search method

In [176]:
class Layer:
    def __init__(self, numbers):
        self.numbers = numbers
    
    def __repr__(self):
        return self.numbers.__repr__()
    
    def __str__(self):
        return self.numbers.__str__()

class NNetwork:
    def __init__(self, layer: Layer, activate_function='sigmoid', cost_function='squared_error'):
        self.layers = layer.numbers
        self.network = None
        
        if activate_function == 'sigmoid':
            self._activate_function = lambda x: 1/(1 + np.exp(-x))
            self._derivative_activate_function = lambda x: x * (1.0 - x)
            
        if cost_function == 'squared_error':
            self._cost_function = lambda x, y: 1/2 * (x - y)**2
            self._derivative_cost_function = lambda x, y: x - y
        
        self._initialize_network()
    
    def _initialize_network(self):
        self.network = list()
        for i in range(1, len(self.layers)):
            layer = np.random.rand(self.layers[i], self.layers[i-1])
            self.network.append({"weights": layer, "forward": None, "deltas": None})
            
    def _forward_propagation(self, inputs: np.array):
        for layer in self.network:
            weigths = layer['weights']
            inputs = self._activate_function(weigths @ inputs)
            layer['forward'] = inputs
        return inputs
            
    def _back_propagation(self, y):
        for index in reversed(range(len(self.network))):
            layer = self.network[index]
            if index == len(self.network)-1:
                layer['deltas'] = (y - layer['forward']) * self._derivative_activate_function(layer['forward'])
            else:
                next_layer = self.network[index+1]
                layer['deltas'] = self._derivative_activate_function(layer['forward']) * (next_layer['deltas'] @ next_layer['weights'])        
    
    def _update_weights(self, inputs, lr):
        for index, layer in enumerate(self.network):
            if index == 0:
                x0, x1 = np.meshgrid(inputs, layer['deltas'])
                dw = lr * (x0 * x1)
            else:
                x0, x1 = np.meshgrid(self.network[index-1]['forward'], layer['deltas'])
                dw = lr * (x0 * x1)
            
            layer['weights'] += dw
    
    def fit(self, X, y, n_epoch=10000, lr=0.01, verbose=True, verbose_epoch=500):
        if verbose:
            print("learning rate = %.3f" % lr)
        for epoch in range(n_epoch):
            for x_, y_ in zip(X, y):
                self._forward_propagation(x_)                
                self._back_propagation(y_)
                self._update_weights(x_, lr)
            
            if verbose:
                if epoch % verbose_epoch == 0:
                    error = np.sqrt(mean_squared_error(y, self.predict(X)))
                    print('epoch=%d, error=%.3f' % (epoch, error))
                elif epoch == n_epoch-1:
                    print('epoch=%d, error=%.3f' % (epoch+1, error))
            
    def predict(self, inputs: np.array, around=None):
        output = []
        for x in inputs:
            output.append(self._forward_propagation(x))
        
        output = np.array(output)
        if around is not None:
            output = np.around(output, decimals=around)
        return output

In [207]:
grid_params = [
    {'layers': [Layer([4, 6, 6, 4]), Layer([4, 7, 7, 4]), Layer([4, 8, 8, 4]), Layer([4, 3, 4, 3, 4])], 'activate_function': ['sigmoid'], 
     'cost_function': ['squared_error'], 'n_epoch': [30000], 'lr': [0.3, 0.35]}
]

In [208]:
class GridSearch:
    def __init__(self, grid_params):
        self.grid_params = grid_params
        self.best_estimator = None
        self.best_params = None
        
    def fit(self, X, y, split_data=True):
        X_train, X_test, y_train, y_test = X, X, y, y
        if split_data:
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
        
        min_error = np.inf
        
        for gp in self.grid_params:            
            grid_params = np.array(np.meshgrid(*gp.values())).T.reshape(-1, len(gp))
            
            for params in grid_params:                
                params = {key: value for key, value in zip(gp.keys(), params)}
                print("Parameters: ")
                for key, value in params.items():
                    print(f"{key}: {value}")
                
                estimator = NNetwork(layer=params['layers'], activate_function=params['activate_function'], cost_function=params['cost_function'])
                estimator.fit(X_train, y_train, n_epoch=params['n_epoch'], lr=params['lr'], verbose=False)
                
                error = np.sqrt(mean_squared_error(y_test, estimator.predict(X_test)))
                print("Loss: ", error, '\n')
                if error < min_error:
                    self.best_estimator = estimator
                    self.best_params = params
                    self.best_loss = error
                    min_error = error

In [210]:
gs = GridSearch(grid_params)
gs.fit(X, y, False)

Parameters: 
layers: [4, 6, 6, 4]
activate_function: sigmoid
cost_function: squared_error
n_epoch: 30000
lr: 0.3
Loss:  0.006935393714144258 

Parameters: 
layers: [4, 7, 7, 4]
activate_function: sigmoid
cost_function: squared_error
n_epoch: 30000
lr: 0.3
Loss:  0.005863674423809358 

Parameters: 
layers: [4, 8, 8, 4]
activate_function: sigmoid
cost_function: squared_error
n_epoch: 30000
lr: 0.3
Loss:  0.0045208275322569935 

Parameters: 
layers: [4, 3, 4, 3, 4]
activate_function: sigmoid
cost_function: squared_error
n_epoch: 30000
lr: 0.3
Loss:  0.36212507793680293 

Parameters: 
layers: [4, 6, 6, 4]
activate_function: sigmoid
cost_function: squared_error
n_epoch: 30000
lr: 0.35
Loss:  0.006526271865093276 

Parameters: 
layers: [4, 7, 7, 4]
activate_function: sigmoid
cost_function: squared_error
n_epoch: 30000
lr: 0.35
Loss:  0.006445633989924597 

Parameters: 
layers: [4, 8, 8, 4]
activate_function: sigmoid
cost_function: squared_error
n_epoch: 30000
lr: 0.35
Loss:  0.00563716318540

In [213]:
gs.best_params

{'layers': [4, 8, 8, 4],
 'activate_function': 'sigmoid',
 'cost_function': 'squared_error',
 'n_epoch': 30000,
 'lr': 0.3}

In [214]:
gs.best_loss

0.0045208275322569935