<a href="https://colab.research.google.com/github/Rupesh05/ML_Workshop---IIT-Kanpur/blob/main/mnist_mln.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Classification of handwritten digits using Multi Layered Network

# MNIST Dataset

<img src="https://upload.wikimedia.org/wikipedia/commons/2/27/MnistExamples.png" title="MNIST dataset" align="center"/>



# Problem
Classify handwritten digits from 0 - 9. <br>
Each image is 28x28 pixels

<img src="https://corochann.com/wp-content/uploads/2017/02/mnist_plot.png" title="" align="center"/>


# Understanding the data

In [None]:
# MNIST data is present in the keras library. Load the data using load_data function of mnist
# Load the data into 4 variables - train_samples, train_labels, test_samples, test_labels
from keras.datasets import mnist

#Your code here
from keras.datasets.mnist import load_data
mnist = load_data()
(train_samples,train_lables),(test_samples,test_lables) = mnist
print(train_samples.dtype)

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
uint8


### Shape of Data

In [None]:
## Print and observe the shapes of the 4 variables - train_samples, train_labels, test_samples, test_labels

#Your code here
print("train_samples = ",train_samples.shape)
print("train_lables = ",train_lables.shape)
print("test_samples = ",test_samples.shape)
print("test_lables = ",test_lables.shape)

train_samples =  (60000, 28, 28)
train_lables =  (60000,)
test_samples =  (10000, 28, 28)
test_lables =  (10000,)


### Range of Values
Look at the range of values

In [None]:
import numpy as np
### Print max value of samples in train_samples
train_samples_max = np.max(train_samples)
print(train_samples_max)
train_lables_max = np.max(train_lables)
print(train_lables_max)
# Your code here

255
9


In [None]:
### Print min values of samples in train_samples
print(np.min(train_samples))
print(np.min(train_lables))
#Your code here

0
0


# Data Preparation

### Normalize inputs to (0, 1)

In [None]:
# Convert data type of elements in train_samples and test_samples from uint8 to float32
train_samples = train_samples.astype('float32')
train_lables = train_lables.astype('float32')
test_samples = test_samples.astype('float32')
test_lables = test_lables.astype('float32')
#print(test_lables)
#Your Code here

In [None]:
# Normalize inputs to (0,1)
mean = np.sum(train_samples, axis=0)/train_samples.shape[0]
X1 = train_samples - mean
stddev = np.sqrt(np.sum(X1*X1, axis=0)/train_samples.shape[0])
train_samples = (train_samples - mean)/(stddev+1e-8)
# Divide train_samples and test_samples by max value of train_samples
train_samples = train_samples/train_samples_max
test_samples = test_samples/train_samples_max
#Your code here

### Convert outputs to 1-hot vectors

In [None]:
train_lables = np.array(train_lables).reshape(-1,1)
test_lables = np.array(test_lables).reshape(-1,1)
# convert train_labels and test_labels to 1-hot encoding
def oneHot(y, Ny):
    Y = np.zeros(Ny)
    #X = np.zeros(Ny)
    Y[int(y)] = 1
    #X[x] = 1
    return Y
  

Ny = 10
train_lables_oneHot = np.array([oneHot(y,Ny) for y in train_lables])
test_lables_oneHot = np.array([oneHot(x,Ny) for x in test_lables])
#print(train_lables_oneHot)

#Your code here

In [None]:
print(test_lables)

# Network Architecture

In [None]:
## Input is currently in the shape (samples, height, width)
## Reshape it such that it is suitable to be fed into a feed forward network
## hint - shape should be (samples, ?)
train_samples=train_samples.reshape(60000,-1)
print(train_samples.shape)
# Your code here

(60000, 784)


In [None]:
#Layer definitions
from keras.layers import Input, Dense
from keras import optimizers
from keras.models import Model
##Create a multi-layered neural network
## 1st hidden layer with 512 neurons and 'relu' activation
## 2nd hidden layer with 256 neurons and 'relu' activation
## Output layer with softmax activation
### Use Adam optimizer of keras
### Categorical cross entropy is loss
# Your code here
Nx = 784
x = Input(shape=(Nx,))
y = Dense(512 , activation = 'relu')(x)
y = Dense(256 , activation = 'relu')(y)
y = Dense(10, activation = 'softmax')(y)
model = Model(inputs=x, outputs=y)
model.compile(optimizer=optimizers.Adam(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])


Instructions for updating:
Colocations handled automatically by placer.


In [None]:
### Print summary of model and check if it is as desired. Also check total number of parameters to be trained.
model.summary()
#Your code here

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 784)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               401920    
_________________________________________________________________
dense_2 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                2570      
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________


In [None]:
### Fit the model on train samples. Play with different values of epochs and batch_size. 
### See which gives the optimal result in the least amount of time.
model.fit(train_samples, train_lables_oneHot, epochs=10)


Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7ff8d3daaa90>

In [None]:
test_samples = test_samples.reshape(10000,-1)
test_lables_oneHot = test_lables_oneHot.reshape(10000,-1)
##Evaluate the model on test samples and print accuracy
loss, accuracy = model.evaluate(test_samples, test_lables_oneHot, verbose=0)
print('Accuracy :%0.3f'%accuracy)
Y_pred = model.predict(test_samples)
# Your code here

Accuracy :0.831


In [None]:
### Print confusion matrix
from sklearn.metrics import confusion_matrix
print( confusion_matrix(test_lables_oneHot.argmax(axis=1), Y_pred.argmax(axis=1)) )
# Your code here

[[ 974    0    0    0    0    0    0    3    0    3]
 [   0 1126    1    0    0    0    4    4    0    0]
 [  66   23  748   62    9    0   11  105    3    5]
 [   4    3    1  937    1   13    0   34    0   17]
 [   8    2    0    0  826    0    3   44    0   99]
 [  59    6    0   40    6  654   34   11    1   81]
 [ 113    3    0    1    6    1  826    1    0    7]
 [   2   10    3    0    0    0    0 1009    1    3]
 [  23   16    2  281    6   51   23   48  354  170]
 [   8   10    0    8    3    0    1  120    0  859]]
