In [None]:
import tensorflow as tf
import numpy as np
import math
import sklearn.metrics as sm
import matplotlib.pyplot as plt

We are assuming that Wi is the weight matrix that maps from layer i to i+1

We also assume that x inputs are a1, hidden layer is a2 and so on


In [None]:
class CLF:
  def __init__(self, x_train, y_train, x_test, y_test):
    self.x_train = x_train
    self.y_train = y_train
    self.x_test = x_test
    self.y_test = y_test
    self.x_train, self.x_test = self.x_train / 255.0, self.x_test / 255.0

    self.batch_size = 32
    self.learning_rate = 0.01
    self.train_steps = len(self.x_train) // self.batch_size
    self.epochs = 5 

    self.d = 28*28
    self.d1 = 300
    self.k = 10
    
    self.W1 = np.random.uniform(-1 , 1 , (self.d1 , self.d))
    self.W2 = np.random.uniform(-1 , 1 , (self.k , self.d1))



  def sigmoid(self, a):
    return 1 / (1 + np.exp(-a))

  def softmax(self, a):
    exps = np.exp(a)
    return exps / np.sum(exps)

  def sigmoid_der(self, a):
    return np.multiply(self.sigmoid(a), (1 - self.sigmoid(a)))

  def forward_pass(self, inp):
    
    z2 = np.matmul(self.W1 , inp)
    a2 = self.sigmoid(z2)

    z3 = np.matmul(self.W2 , a2)
    a3 = self.softmax(z3)

    return a2 , a3


  def calc_loss(self, y_true, y_pred):
    return -np.dot(np.squeeze(y_true) , np.squeeze(np.log(y_pred)))

  
  def compute_grad(self, y_true, y_pred, a2, a1):
    delta3 = y_pred - y_true

    delta2 = np.multiply((np.matmul(np.transpose(self.W2) , delta3)) , self.sigmoid_der(a2))


    del2 = np.matmul(delta3 , np.transpose(a2))
    del1 = np.matmul(delta2, np.transpose(a1))

    return del1 , del2


  def encode(self,y):
    temp = [0,0,0,0,0,0,0,0,0,0]
    temp[y] = 1
    temp = np.array(temp)
    temp = temp.reshape(10,1)
    return temp
  

  def train(self):
    for epoch in range(self.epochs):
      for step in range(self.train_steps):
        delw1 = np.zeros((self.d1 , self.d))
        delw2 = np.zeros((self.k , self.d1))


        for b in range(step * self.batch_size, (step+1) * self.batch_size): 
          temp_x_train = self.x_train[b].reshape(784,1)

          temp_y_train = self.encode(self.y_train[b])

          a2 , yhat = self.forward_pass(temp_x_train)

          loss = self.calc_loss(temp_y_train, yhat)

          del1 , del2 = self.compute_grad(temp_y_train , yhat, a2, temp_x_train)

          delw1 += del1
          delw2 += del2

        self.W1 = self.W1 - ((self.learning_rate/self.batch_size) * delw1)
        self.W2 = self.W2 - ((self.learning_rate/self.batch_size) * delw2)
  

  def predict(self,x_test,y_test):
    x_test = x_test.reshape(784,1)
    a2, yhat = self.forward_pass(x_test)


    return yhat

  def evaluate(self):
    yhat_vec = []
    for i in range(len(self.x_test)):
      temp_y = self.predict(self.x_test[i], self.y_test[i])
      yhat_vec.append(np.argmax(temp_y))
    acc = sm.accuracy_score(self.y_test , yhat_vec)
    print(f'Accuracy is: {acc}')

    return acc

  


  






In [None]:
mnist = tf.keras.datasets.mnist
(x_train, y_train) , (x_test, y_test) = mnist.load_data()
total_size = len(x_train)
ANN_25 = CLF(x_train[0:math.ceil(total_size * 0.25)] , y_train[0:math.ceil(total_size * 0.25)] , x_test , y_test)
ANN_50 = CLF(x_train[0:math.ceil(total_size * 0.50)] , y_train[0:math.ceil(total_size * 0.50)] , x_test , y_test)
ANN_75 = CLF(x_train[0:math.ceil(total_size * 0.75)] , y_train[0:math.ceil(total_size * 0.75)] , x_test , y_test)
ANN_100 = CLF(x_train , y_train , x_test , y_test)
ANN_25.train()
ANN_50.train()
ANN_75.train()
ANN_100.train()

In [None]:
LC = []
LC.append(ANN_25.evaluate())
LC.append(ANN_50.evaluate())
LC.append(ANN_75.evaluate())
LC.append(ANN_100.evaluate())

In [None]:
Error = 1 - np.array(LC)
print(f'The accuracies are {LC}')
print(f'The Errors are: {Error}')
plt.plot([25,50,75,100] , LC)
plt.xlabel('Percentage of total dataset used')
plt.ylabel('Test set accuracy')