In [None]:
!pip install tensorflow==2.4.1
!pip install gpustat
import numpy as np
import tensorflow as tf
from keras.utils.np_utils import to_categorical
import time

In [None]:
# load data

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

x_train, x_test = x_train[..., np.newaxis]/255.0, x_test[..., np.newaxis]/255.0

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# reshape
x_train = np.reshape(x_train, (60000, 784))
x_test = np.reshape(x_test, (10000, 784))


In [4]:
class MLP_2():

  # initialization
  def __init__(self, sizes, epochs=100, l_rate=0.001):
      self.sizes = sizes
      self.epochs = epochs
      self.l_rate = l_rate
      self.params = self.initialization()

  def softmax(self, x, derivative=False):
          exps = np.exp(x - x.max())
          if derivative:
              return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
          return exps / np.sum(exps, axis=0)

  def sigmoid(self, x, derivative=False):
          if derivative:
              return (np.exp(-x))/((np.exp(-x)+1)**2)
          return 1/(1 + np.exp(-x))

  def ReLU(self, x, derivative = False):
        if derivative:
          y = x
          y[y <= 0] = 0
          y[y > 0] = 1
          return y
        x[x <= 0] = 0
        return x


  # with one hidden layer
  def initialization(self):
          # number of nodes in each layer
        input_layer=self.sizes[0]
        hidden_1=self.sizes[1]
        hidden_2=self.sizes[2]
        output_layer=self.sizes[3]

        params = {
            'W0':np.random.randn(hidden_1, input_layer) * np.sqrt(1. / hidden_1),
            'W1':np.random.randn(hidden_2, hidden_1) * np.sqrt(1. / hidden_2),
            'W2':np.random.randn(output_layer, hidden_2) * np.sqrt(1. / output_layer)
        }

        return params


  def forward(self, x_train):
        params = self.params
        params['A0'] = x_train
        # from input layer to hidden layer1
        params['A1']=np.dot(params['W0'], params['A0'])
        params['S1'] =self.ReLU(params['A1'])

        # hidden layer1 to hidden layer2
        params['A2'] = np.dot(params["W1"], params['S1'])
        params['S2'] = self.ReLU(params['A2'])

        # hidden layer 2 to output layer
        params['A3'] = np.dot(params["W2"], params['S2'])
        params['S3'] = self.softmax(params['A3'])

        return params['S3']

  def backward(self, y_train, output):
        params = self.params
        changes_to_w = {}

        # Calculate W3 update
        error = 2 * (output - y_train) / output.shape[0] * self.softmax(params['A3'], derivative=True)
        changes_to_w['W2'] = np.outer(error, params['S2'])

        # Calculate W1 update
        error = np.dot(params['W2'].T, error) * self.ReLU(params['A2'],derivative=True)
        changes_to_w['W1'] = np.outer(error, params['S1'])

        # Calculate W0 update
        error = np.dot(params['W1'].T, error) * self.ReLU(params['A1'],derivative=True)
        changes_to_w['W0'] = np.outer(error, params['A0'])

        return changes_to_w

  def update_network_parameters(self, changes_to_w):        
          for key, value in changes_to_w.items():
              self.params[key] -= self.l_rate * value

  def train(self, x_train, y_train, x_val, y_val):
          start_time = time.time()
          for iteration in range(self.epochs):
              for x,y in zip(x_train, y_train):
                  output = self.forward(x)
                  changes_to_w = self.backward(y, output)
                  self.update_network_parameters(changes_to_w)
              
              accuracy = self.evaluate_acc(x_val, y_val)
              print('Epoch: {0}, Time Spent: {1:.2f}s, Accuracy: {2:.2f}%'.format(
                  iteration+1, time.time() - start_time, accuracy * 100
              ))
  def evaluate_acc(self, x_val, y_val):
          predictions = []

          for x, y in zip(x_val, y_val):
              output = self.forward(x)
              pred = np.argmax(output)
              predictions.append(pred == np.argmax(y))
              
          return np.mean(predictions)

In [None]:
mlp2_unormalized = MLP_2(sizes=[784,128,128,10])
mlp2_unormalized.train(x_train,y_train,x_test,y_test)