### Baseado no Chapter 12 - Implementing a Multi-layer Artificial Neural Network from Scratch

In [90]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import pandas as pd

%matplotlib inline

In [91]:
p_h = []
p_out = []


# 1 -

In [293]:

class NeuralNetMLP(object):
  
    def __init__(self, n_hidden=17,
                 l2=0.0, epochs=100, eta=0.001,
                 shuffle=True, minibatch_size=1, seed=None):

        self.random = np.random.RandomState(seed)
        self.n_hidden = n_hidden
        self.l2 = l2
        self.epochs = epochs
        self.eta = eta
        self.shuffle = shuffle
        self.minibatch_size = minibatch_size

    def _onehot(self, y, n_classes):
    
        onehot = np.zeros((n_classes, y.shape[0]))
        for idx, val in enumerate(y.astype(int)):
            onehot[val, idx] = 1.
        return onehot.T

    def _sigmoid(self, z):
    
        return 1. / (1. + np.exp(-np.clip(z, -250, 250)))
    
    def ReLU(self, z):
        return np.maximum(0,z)
    def reluDerivative(self, x):
        if x > 0: 
            return 1
        return 0
    def _forward(self, X):

        z_h = np.dot(X, self.w_h)
        a_h = self.ReLU(z_h)
        z_out = np.dot(a_h, self.w_out)
        a_out = self.ReLU(z_out)


        return z_h, a_h, z_out, a_out

    def _compute_cost(self, y_enc, output):
     
        L2_term = (self.l2 *
                   (np.sum(self.w_h ** 2.) +
                    np.sum(self.w_out ** 2.)))
        
        term1 = -y_enc * (np.log(output))
        term2 = (1. - y_enc) * np.log((1. - output) + 1e-17)
        cost = np.sum(term1 - term2) + L2_term
       
        return cost

    def predict(self, X):

        z_h, a_h, z_out, a_out = self._forward(X)
        y_pred = np.argmax(z_out, axis=1)
        return y_pred

    def fit(self, X_train, y_train):
      
        n_output = np.unique(y_train).shape[0]  # number of class labels
        n_features = X_train.shape[1]

        ########################
        # Weight initialization
        #######################
        
        
        self.w_h =  np.array([[2, 1 , 1], [1, -2, 2]])
        self.w_h = self.w_h.astype(np.float64)
      
        

        self.w_out = np.array([[-1], [3], [2]])
        self.w_out = self.w_out.astype(np.float64)
     

        epoch_strlen = len(str(self.epochs))  # for progress formatting
        self.eval_ = {'cost': [], 'train_acc': [], 'valid_acc': []}

        y_train_enc = self._onehot(y_train, n_output)

        # iterate over training epochs
        for i in range(self.epochs):

            # iterate over minibatches
            indices = np.arange(X_train.shape[0])

            if self.shuffle:
                self.random.shuffle(indices)

            for start_idx in range(0, indices.shape[0] - self.minibatch_size +
                                   1, self.minibatch_size):
                batch_idx = indices[start_idx:start_idx + self.minibatch_size]

                z_h, a_h, z_out, a_out = self._forward(X_train[batch_idx])

                ##################
                # Backpropagation
                ##################


                sigma_out = a_out - y_train_enc[batch_idx]

                derivative_h = a_h

                # [n_samples, n_classlabels] dot [n_classlabels, n_hidden]
                # -> [n_samples, n_hidden]
                sigma_h = (np.dot(sigma_out, self.w_out.T) *
                           derivative_h)

                # [n_features, n_samples] dot [n_samples, n_hidden]
                # -> [n_features, n_hidden]
                grad_w_h = np.dot(X_train[batch_idx].T, sigma_h)


                # [n_hidden, n_samples] dot [n_samples, n_classlabels]
                # -> [n_hidden, n_classlabels]
                grad_w_out = np.dot(a_h.T, sigma_out)
                grad_b_out = np.sum(sigma_out, axis=0)

                # Regularization and weight updates
                delta_w_h = (grad_w_h + self.l2*self.w_h)
                self.w_h -= self.eta * delta_w_h

                delta_w_out = (grad_w_out + self.l2*self.w_out)
                self.w_out -= self.eta * delta_w_out
    

            #############
            # Evaluation
            #############

            # Evaluation after each epoch during training
            z_h, a_h, z_out, a_out = self._forward(X_train)
            
            cost = self._compute_cost(y_enc=y_train_enc,
                                      output=a_out)

            y_train_pred = self.predict(X_train)
          

            train_acc = ((np.sum(y_train == y_train_pred)).astype(np.float) /
                         X_train.shape[0])
           

            self.eval_['cost'].append(cost)
            self.eval_['train_acc'].append(train_acc)
           

        return self

In [287]:

X = np.array([[1, 2]])
y = np.array([3])
y = np.where(y == 3, 0, 1)

epocas = [10, 50, 100]
lrs = [0.1, 0.01, 0.001]

In [294]:
for ep in epocas:
    for lr in lrs:
        nn = NeuralNetMLP(epochs=10,eta=0.001)
        nn.fit(X,y);
        print('taxa de aprendizagem = {} épocas = {} \n'.format(lr, ep))
        print('w_h:\n')
        print('{}\n'.format(nn.w_h))
        print('w_out:\n')
        print('{}\n'.format(nn.w_out))

taxa de aprendizagem = 0.1 - épocas = 10 

w_h:

[[ 2.12014496  1.          0.76664092]
 [ 1.24028991 -2.          1.53328185]]

w_out:

[[-1.11436199]
 [ 3.        ]
 [ 1.88019007]]

taxa de aprendizagem = 0.01 - épocas = 10 

w_h:

[[ 2.12014496  1.          0.76664092]
 [ 1.24028991 -2.          1.53328185]]

w_out:

[[-1.11436199]
 [ 3.        ]
 [ 1.88019007]]

taxa de aprendizagem = 0.001 - épocas = 10 

w_h:

[[ 2.12014496  1.          0.76664092]
 [ 1.24028991 -2.          1.53328185]]

w_out:

[[-1.11436199]
 [ 3.        ]
 [ 1.88019007]]

taxa de aprendizagem = 0.1 - épocas = 50 

w_h:

[[ 2.12014496  1.          0.76664092]
 [ 1.24028991 -2.          1.53328185]]

w_out:

[[-1.11436199]
 [ 3.        ]
 [ 1.88019007]]

taxa de aprendizagem = 0.01 - épocas = 50 

w_h:

[[ 2.12014496  1.          0.76664092]
 [ 1.24028991 -2.          1.53328185]]

w_out:

[[-1.11436199]
 [ 3.        ]
 [ 1.88019007]]

taxa de aprendizagem = 0.001 - épocas = 50 

w_h:

[[ 2.12014496  1.        



*  Os pesos estão variando pouco mesmo modificando a taxa de aprendizagem e o número de épocas. São poucos os exemplos de dados entradas então é fácil para rede conseguir um peso ideal rapidamente.