<a href="https://colab.research.google.com/github/Adil2lab/AIMLNN/blob/main/notebooks/DigitRecogT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

# Getting the **training** dataset
We are getting the Fashion MNIST dataset for Digit Recognition.

In [2]:
data_train = pd.read_csv('/content/drive/MyDrive/archive/fashion-mnist_train.csv')
data_dev = pd.read_csv('/content/drive/MyDrive/archive/fashion-mnist_test.csv')

In [3]:
training_data_np = np.array(data_train)
np.random.shuffle(training_data_np)

Y_train = training_data_np[:, 0].astype(int)
X_train = training_data_np[:, 1:]
X_train = X_train.T / 255.0

test_data_np = np.array(data_dev)
np.random.shuffle(test_data_np)

Y_test = test_data_np[:, 0].astype(int)
X_test = test_data_np[:, 1:]
X_test = X_test.T / 255.0

In [4]:
def init_param():
  W1 = np.random.rand(10, 784) - 0.5
  b1 = np.random.rand(10, 1) - 0.5

  W2 = np.random.rand(10, 10) - 0.5
  b2 = np.random.rand(10, 1) - 0.5

  return W1, b1, W2, b2

In [5]:
def ReLU(Z):
  return np.maximum(0, Z)

In [6]:
def SoftMax(Z):
  # Subtracting the maximum value for numerical stability
  exp_Z = np.exp(Z - np.max(Z, axis=0, keepdims=True))
  a = exp_Z / np.sum(exp_Z, axis=0, keepdims=True)
  return a

In [7]:
def ForwProp(W1, b1, W2, b2, X):
  Z1 = W1.dot(X) + b1
  A1 = ReLU(Z1)

  Z2 = W2.dot(A1) + b2
  A2 = SoftMax(Z2)
  return Z1, A1, Z2, A2

In [8]:
def oneHot(Y):
  one_hot_Y = np.zeros((Y.size, Y.max() + 1))
  one_hot_Y[np.arange(Y.size), Y] = 1
  return one_hot_Y.T

In [9]:
def derReLU(Z):
  return Z > 0

In [10]:
def BackProp(Z1, A1, Z2, A2, W2, X, Y):
  m = Y.size
  hY = oneHot(Y)
  dZ2 = A2 - hY
  dW2 = 1 / m * dZ2.dot(A1.T)
  db2 = 1 / m * np.sum(dZ2, axis=1, keepdims=True)

  dZ1 = W2.T.dot(dZ2) * derReLU(Z1)
  dW1 = 1 / m * dZ1.dot(X.T)
  db1 = 1 / m * np.sum(dZ1, axis=1, keepdims=True)
  return dW1, db1, dW2, db2

In [11]:
def updateParams(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
  W1 = W1 - alpha * dW1
  b1 = b1 - alpha * db1
  W2 = W2 - alpha * dW2
  b2 = b2 - alpha * db2
  return W1, b1, W2, b2

In [12]:
def getPredictions(A2):
  return np.argmax(A2, 0)

def getAccuracy(predictions, Y):
  print(predictions, Y)
  return np.sum(predictions == Y) / Y.size

In [13]:
def gradientDescent(x, y, alpha, iterations):
  W1, b1, W2, b2 = init_param()
  for i in range(iterations):
    Z1, A1, Z2, A2 = ForwProp(W1, b1, W2, b2, x)
    dW1, db1, dW2, db2 = BackProp(Z1, A1, Z2, A2, W2, x, y)
    W1, b1, W2, b2 = updateParams(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
    if (i % 10 == 0):
      print("Iteration: ", i)
      print("Accuracy: ", getAccuracy(getPredictions(A2), y))
  return W1, b1, W2, b2

In [14]:
def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = ForwProp(W1, b1, W2, b2, X)
    predictions = getPredictions(A2)
    return predictions

def test_prediction(index, W1, b1, W2, b2):
    current_image = X_train[:, index, None]
    prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2)
    label = Y_train[index]
    print("Prediction: ", prediction)
    print("Label: ", label)

    current_image = current_image.reshape((28, 28)) * 255
    plt.gray()
    plt.imshow(current_image, interpolation='nearest')
    plt.show()

In [15]:
W1, b1, W2, b2 = gradientDescent(X_train, Y_train, 0.10, 550)

Iteration:  0
[9 6 9 ... 6 9 9] [2 4 3 ... 3 6 1]
Accuracy:  0.0819
Iteration:  10
[0 2 2 ... 2 2 1] [2 4 3 ... 3 6 1]
Accuracy:  0.22525
Iteration:  20
[0 2 3 ... 2 2 1] [2 4 3 ... 3 6 1]
Accuracy:  0.37498333333333334
Iteration:  30
[2 3 3 ... 3 2 1] [2 4 3 ... 3 6 1]
Accuracy:  0.45181666666666664
Iteration:  40
[2 3 3 ... 3 2 1] [2 4 3 ... 3 6 1]
Accuracy:  0.49733333333333335
Iteration:  50
[2 3 3 ... 3 2 1] [2 4 3 ... 3 6 1]
Accuracy:  0.5322333333333333
Iteration:  60
[2 3 3 ... 3 0 1] [2 4 3 ... 3 6 1]
Accuracy:  0.5582666666666667
Iteration:  70
[2 3 3 ... 3 0 1] [2 4 3 ... 3 6 1]
Accuracy:  0.5784333333333334
Iteration:  80
[2 3 3 ... 3 0 1] [2 4 3 ... 3 6 1]
Accuracy:  0.5869333333333333
Iteration:  90
[6 3 3 ... 3 6 1] [2 4 3 ... 3 6 1]
Accuracy:  0.5888833333333333
Iteration:  100
[6 3 4 ... 3 6 1] [2 4 3 ... 3 6 1]
Accuracy:  0.5994166666666667
Iteration:  110
[6 3 4 ... 3 6 1] [2 4 3 ... 3 6 1]
Accuracy:  0.6082166666666666
Iteration:  120
[6 3 4 ... 3 6 1] [2 4 3 ... 3 