<a href="https://colab.research.google.com/github/NikuDubenco/code_replications/blob/master/multilayer_perceptron.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# credits Python machine learning, S. Raschka, V. Mirjalili / book p.396
# multilayer perceptron 

import numpy as np
import sys

class NeuralNetMLP(object):
  '''
  feedforward neural nerwork / Multi-layer perceptron classifier.
  
  parameters
  ----------
  n_hidden: int (default: 30)
      Number of hidden units.
      
  12: float (default: 0.)
      Lambda value for L2-regularization.
      No regularization if L2=0. (default)
  
  epochs: int(default:100)
      Number of passes over the training set.
  
  eta: float (default: .001)
      Learning rate.
  
  shuffle: bool (default: True)
      Shuffle training data every epoch
      if True to prevent circles.
      
  minibatch_size: int (default: 1)
      Number of training samples per minibatch.
      
  seed: int (default: None)
      Random seed for initializing weights and shuffling.
      
      
  Attributes
  ----------
  eval_ : dict
      Dictionary collecting the cost, training accuracy, and validation accuracy
      for each epoch during training.
      
  '''
  def __init__(self, n_hidden=30, l2=0., epochs=100, eta=.001, shuffle=True,
              minibatch_size=1, seed=None):
    
    self.random = np.random.RandomState(seed)
    self.n_hidden = n_hidden
    self.l2 = l2
    self.epochs = epochs
    self.eta = eta
    self.shuffle = shuffle
    self.minibatch_size = minibatch_size
    
  def _onehot(self, y, n_classes):
    '''
    encode labels into one-hot representation
    
    Parameters
    ----------
    
    y: array, shape = [n_samples]
       target values.
       
    returns
    -------
    cost: float
        regularized cost
        
    '''
    
    z_h, a_h, z_out, a_out = self._forward(X)
    y_pred - np.argmax(z_out, axis=1)
    return y_pred
  
  def fit(self, X_train, y_train, X_val, y_val):
    '''
    learn weights from training data.
    
    Parameters
    ----------
    X_train: array, shape = [n_samples, n_features]
        input layer with original features.
    y_train: array, shape = [n_samples]
        target class labels.
    X_val: array, shape = [n_samples, n_features]
        sample features for validation during training
    y_val: array, shape = [n_samples]
        sample labels for validation during training
        
        
    Returns:
    --------
    self
    
    '''
    
    n_output = np.unique(y_train).shape[0]  # no. of classlabels
    
    n_features = X_train.shape[1]
    
    #############################
    # Weight initialization
    #############################
    
    # Weight for input -> hidden
    self.b_h = np.zeros(self.n_hidden)
    self.w_h = self.random.normal(loc=0.0, scale=.1, size=(n_features, n_output))
    
    epoch_strlen = len(str(self.epochs))  # for progr. format.
    self.eval_ = {'cost':[], 'train_acc':[], 'val_acc':[]}
    
    y_train_enc = self._onehot(y_train, n_output)
    
    # iterate over training epochs
    for i in range(self.epochs):
      #iterate over minibatches
      indices = np.arange(X_train.shape[0])
      
      if self.shuffle:
        self.random.shuffle(indices)
        
      for start_idx in range(0, indices.shape[0] - self.minibatch_size + 1, 
                             self.minibatch_size):
        batch_idx = indices[start_idx: start_idx + self.minibatch_size]
        
        # forward propagation
        z_h, a_h, z_out, a_out = self._forward(X_train[batch_idx])
        
        ######################
        # Backpropagation
        ######################
        
        # [n_samples, n_classlabels]
        sigma_out = a_out - y_train_enc[batch_idx]
        
        # [n_samples, n_hidden]
        sigmoid_derivative_h = a_h * (1. - a_h)
        
        # [n_samples, n_classlabels] dot [n_classlabels, n_hidden] -> [n_samples, n_hidden]
        sigma_h = (np.dot(sigma_out, self.w_out.T) * sigmoid_derivative_h)
        
        # [n_features, n_samples] dot [n_samples, n_hidden] -> [n_features, n_hidden]
        grad_w_h = np.dot(X_train[batch_idx].T, sigma_h)
        grad_b_h = np.sum(sigma_h, axis=0)
        
        # [n_hidden, n_samples] dot [n_samples, n_classlabels] -> [n_hidden, n_classlabels]
        grad_w_out = np.dot(a_h.T, sigma_out)
        grad_b_out = np.sum(sigma_out, axis=0)
        
        # regularization and weight updates
        delta_w_h = (grad_w_h + self.l2 * self.w_h)
        delta_b_h = grad_b_h  # bias is not regularized
        self.w_h -= self.eta * delta_w_out
        self.b_h -= self.eta * delta_b_out
        
      ####################
      # Evaluation
      ###################

      # evaluation after each epoch during training
      z_h, a_h, z_out, a_out = self._forward(X_train)

      cost = self._compute_cost(y_enc=y_train_enc, output=a_out)

      y_train_pred = self.predict(X_train)
      y_val_pred = self.predict(X_val)

      train_acc = ((np.sum(y_train == y_train_pred)).astype(np.float) / X_train.shape[0])

      val_acc = ((np.sum(y_val == y_val_pred)).astype(np.float) / X_val.shape[0])

      sys.stderr.write('\r%0*d/%d | Cost: %.2f | Train/Val Acc: %.2f%%/%.2f%% '
                       %
                       (epoch_strlen, i+1, self.epochs, cost, train_acc * 100, 
                       val_acc * 100))

      sys.stderr.flush()

      self.eval_['cost'].append(cost)
      self.eval_['train_acc'].append(train_acc)
      self.eval_['val_acc'].append(val_acc)
      
    return self
        
      