<a href="https://colab.research.google.com/github/datle2403/datle2403/blob/main/ANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
X, y= fetch_openml('mnist_784', version=1, return_X_y= True)
y=y.astype(int)
X= ((X/255.)-0.5)* 2
X_train, X_test, y_train, y_test= train_test_split(X,y ,test_size=10000, random_state=123, stratify=y)

In [13]:
import numpy as np
mnist=np.load('mnist_scaled.npz')

In [14]:
X_train, y_train, X_test, y_test = [mnist[f] for f in ['X_train', 'y_train', 
                                    'X_test', 'y_test']]

In [15]:
X_train.shape

(60000, 784)

In [16]:
import sys
import numpy as np
class NeuralNetMLP(object):
  """
  Parameter:
  n_hidden= number of hidden unit
  l2: lambda value for L2-regularization
  epochs: number of passes over the training set
  eta: learning rate
  shuffle: shuffle training set every epoch
  minibatch_size: number of training examples per minibatch
  seed: random seed for initializing weights and shuffling

  Attributes
  eval_: dict collecting cost, training accuracy, valid accuracy for each epoch
  """
  def __init__(self, n_hidden=30, l2=0, epochs= 100, eta=0.01, shuffle=True, minibatch_size=1, seed=None):
    self.random= np.random.RandomState(seed)
    self.n_hidden= n_hidden
    self.l2=l2
    self.eta=eta
    self.epochs=epochs
    self.shuffle=shuffle
    self.minibatch_size=minibatch_size
  def _onehot(self, y, n_classes):
    """
    Encode label into one-hot

    y: shape=[n_examples]
    return onehot: shape= (n_examples, n_labels)
    """
    onehot= np.zeros((n_classes, y.shape[0]))
    for idx, val in enumerate(y.astype(int)):
      onehot[val,idx]=1.
    return onehot.T
  def _sigmoid(self, z):
    return 1./(1.+ np.exp(-np.clip(z,-250,250)))
  def _forward(self, X):
        """Compute forward propagation step"""

        # step 1: net input of hidden layer
        # [n_examples, n_features] dot [n_features, n_hidden]
        # -> [n_examples, n_hidden]
        z_h = np.dot(X, self.w_h) + self.b_h

        # step 2: activation of hidden layer
        a_h = self._sigmoid(z_h)

        # step 3: net input of output layer
        # [n_examples, n_hidden] dot [n_hidden, n_classlabels]
        # -> [n_examples, n_classlabels]

        z_out = np.dot(a_h, self.w_out) + self.b_out

        # step 4: activation output layer
        a_out = self._sigmoid(z_out)

        return z_h, a_h, z_out, a_out

  def _compute_cost(self, y_enc, output):
    """
    Compute the cost function
    y_enc: one_hot encoded class labels
    output: activation of output layer
    """
    L2_term= (self.l2 * (np.sum(self.w_h ** 2.) + np.sum(self.w_out ** 2.)))
    term1= -y_enc * (np.log(output))
    term2= (1.- y_enc) * np.log(1. -output)
    cost= np.sum(term1 - term2)+ L2_term
    return cost
  def predict(self, X):
    z_h, a_h, z_out, a_out= self._forward(X)
    y_pred= np.argmax(z_out, axis=1)
    return y_pred
  def fit(self, X_train, y_train, X_valid, y_valid):
    n_output= np.unique(y_train).shape[0]
    n_features= X_train.shape[1]

    self.b_h= np.zeros(self.n_hidden)
    self.w_h= self.random.normal(loc=0.0, scale=0.1, size=(n_features, self.n_hidden))

    self.b_out=np.zeros(n_output)
    self.w_out=self.random.normal(loc=0.0, scale=0.1, size=(self.n_hidden, n_output))

    epoch_strlen= len(str(self.epochs))
    self.eval_= {'cost': [], 'train_acc':[], 'valid_acc': []}

    y_train_enc= self._onehot(y_train, n_output)
    
    #iterate over training epochs

    for i in range(self.epochs):

      # iterate over minibatchs
      indices= np.arange(X_train.shape[0])
      if self.shuffle:
        self.random.shuffle(indices)
      for start_idx in range(0, indices.shape[0] - self.minibatch_size +
                                   1, self.minibatch_size):
        batch_idx = indices[start_idx:start_idx + self.minibatch_size]
        # forward prop
        z_h, a_h, z_out, a_out= self._forward(X_train[batch_idx])

        # back_ prop
        delta_out = a_out - y_train_enc[batch_idx]

        sigmoid_prime_h= a_h * (1. - a_h)

        delta_h= (np.dot(delta_out, self.w_out.T) * sigmoid_prime_h)

        grad_w_h= np.dot(X_train[batch_idx].T, delta_h)
        grad_b_h= np.sum(delta_h, axis=0)

        grad_w_out= np.dot(a_h.T, delta_out)
        grad_b_out= np.sum(delta_out, axis=0)

        #regularization and weight update
        delta_w_h= (grad_w_h+ self.l2 * self.w_h)
        delta_b_h= grad_b_h
        self.w_h -=self.eta * delta_w_h
        self.b_h -=self.eta * delta_b_h
        
        delta_w_out= (grad_w_out + self.l2* self.w_out)
        delta_b_out= grad_b_out
        self.w_out-= self.eta * delta_w_out 
        self.b_out-= self.eta * delta_b_out

      # evaluate for each epochs
      z_h, a_h, z_out, a_out= self._forward(X_train)
      cost= self._compute_cost(y_enc=y_train_enc, output=a_out)
      y_train_pred= self.predict(X_train)
      y_valid_pred= self.predict(X_valid)

      train_acc= ((np.sum(y_train == y_train_pred)).astype(np.float)/X_train.shape[0])
      valid_acc= ((np.sum(y_valid == y_valid_pred)).astype(np.float)/X_valid.shape[0])

      sys.stderr.write('\r%0*d/%d | Cost: %.2f '
                             '| Train/Valid Acc.: %.2f%%/%.2f%% ' %
                             (epoch_strlen, i+1, self.epochs, cost,
                              train_acc*100, valid_acc*100))
      sys.stderr.flush()

      self.eval_['cost'].append(cost)
      self.eval_['train_acc'].append(train_acc)
      self.eval_['valid_acc'].append(valid_acc)
    return self




In [17]:

nn = NeuralNetMLP(n_hidden=100, 
                  l2=0.01, 
                  epochs=200, 
                  eta=0.0005,
                  minibatch_size=100, 
                  shuffle=True,
                  seed=1)
nn.fit(X_train=X_train[:55000], 
       y_train=y_train[:55000],
       X_valid=X_train[55000:],
       y_valid=y_train[55000:])

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  train_acc= ((np.sum(y_train == y_train_pred)).astype(np.float)/X_train.shape[0])
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  valid_acc= ((np.sum(y_valid == y_valid_pred)).astype(np.float)/X_valid.shape[0])
200/200 | Cost: 5065.78 | Train/Valid Acc.: 99.28%/97.98% 

<__main__.NeuralNetMLP at 0x7f55e92ef580>

In [5]:
m[:10]

array([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [6]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])