<a href="https://colab.research.google.com/github/AbhiramAnanthu/Machine-Learning-Road-map/blob/main/digit_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
mnist  = tf.keras.datasets.mnist
(train_images,train_labels),(test_images,test_labes) = mnist.load_data()
train_images, test_images = train_images/255.0, test_images/255.0
train_images = train_images.reshape(train_images.shape[0],-1)
test_images  = test_images.reshape(test_images.shape[0],-1)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
input_size = 784
hidden_size = 128
output_size = 10
learning_rate = 0.01

W1 = np.random.randn(input_size,hidden_size) *0.01
b1 = np.zeros((1,hidden_size))
W2 = np.random.randn(hidden_size,output_size) *0.01
b2 = np.zeros((1,output_size))

In [4]:
def relu(Z):
  return np.maximum(0,Z)

def relu_derivative(Z):
  return Z>0

def softmax(Z):
  expZ = np.exp(Z-np.max(Z))
  return expZ/np.sum(expZ,axis=1,keepdims=True)


In [9]:
def forward_propogation(X,W1,b1,W2,b2):
  Z1 = np.dot(X,W1) + b1
  A1 = relu(Z1)
  Z2 = np.dot(A1,W2) + b2
  A2 = softmax(Z2)

  return Z1,A1,Z1,A2

def compute_lose(Y,A2):
  m = Y.shape[0]
  log_likelihood = -np.log(A2[range(m),Y])
  loss = np.sum(log_likelihood)/m
  return loss

def back_propogation(X,Y,Z1,A1,Z2,A2,W1,W2):
  m = X.shape[0]
  dZ2 = A2
  dZ2[range(m), Y] -= 1
  dZ2 /= m
  dW2 = np.dot(A1.T, dZ2)
  db2 = np.sum(dZ2, axis=0, keepdims=True)
  dA1 = np.dot(dZ2, W2.T)
  dZ1 = dA1 * relu_derivative(Z1)
  dW1 = np.dot(X.T, dZ1)
  db1 = np.sum(dZ1, axis=0, keepdims=True)
  return dW1, db1, dW2,db2

In [6]:
def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    return W1, b1, W2, b2


In [10]:
num_epochs = 1000
for epoch in range(num_epochs):
    Z1, A1, Z2, A2 = forward_propogation(train_images, W1, b1, W2, b2)
    loss = compute_lose(train_labels, A2)
    dW1, db1, dW2, db2 = back_propogation(train_images, train_labels, Z1, A1, Z2, A2, W1, W2)
    W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)

    if epoch % 100 == 0:
        print(f'Epoch {epoch}: Loss = {loss}')


Epoch 0: Loss = 2.3021625506031356
Epoch 100: Loss = 2.2902450889412687
Epoch 200: Loss = 2.267021394463486
Epoch 300: Loss = 2.217107701484511
Epoch 400: Loss = 2.1181726457419536
Epoch 500: Loss = 1.9510809685897397
Epoch 600: Loss = 1.7187014865274841
Epoch 700: Loss = 1.4655248268205376
Epoch 800: Loss = 1.2428270458911068
Epoch 900: Loss = 1.0683961648979188


In [12]:
_, _, _, A2_test = forward_propogation(train_images, W1, b1, W2, b2)
predictions = np.argmax(A2_test, axis=1)
accuracy = np.mean(predictions == train_labels)
print(f'Test accuracy: {accuracy}')


Test accuracy: 0.78915
