In [60]:
from PIL import Image
import tensorflow as tf
import mnist
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix

Making Variables after importing dataset.

In [61]:
#Let's create the training and testing variables
 #   X_train = mnist.train_images()
 #   y_train = mnist.train_labels()

 #   X_test = mnist.test_images()
 #   y_test = mnist.test_labels()

#This wont work as the mnist library is trying to download the MNIST dataset from a URL that is no longer valid or accessible. So use Tenserflow/Keras instead to load MNIST. 

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# Retain data in the range [0, 255]
X_train = X_train.astype('uint8')  # Keep as unsigned 8-bit integers
X_test = X_test.astype('uint8')

In [62]:
#Let's look at the data stored in the Var:
print('X_train', X_train)
print('X_test', X_test)
print('y_train', y_train) 
print('y_test', y_test) 

#X_train, X_test are 3 dimensional array while y_train and y_test are just a list of digits from 0 to 9. To check dimension of the data:
print("\nThe dimension of X is:", X_train.ndim) 

X_train [[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 ...

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]]
X_test [[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 

In [63]:
print(X_train.shape)

(60000, 28, 28)


So we have 60000 instances of data consisting of 28 x 28 Matrices(Pixels).

In [64]:
#let's now combine the 28 by 28 matrix in a single list to feed the neural network.
X_train = X_train.reshape((-1, 28*28))      # -1 shows that we need to keep that parameter same. 
print(X_train.shape)
X_test = X_test.reshape((-1, 28*28))
print(X_test.shape)

(60000, 784)
(10000, 784)


In [65]:
#Let's see one element of X_train now, i.e , one list of 784 entries. 
print(X_train[0])

[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   3  18  18  18 126 136 175  26 166 255
 247 127   0   0   0   0   0   0   0   0   0   0   0   0  30  36  94 154
 170 253 253 253 253 253 225 172 253 242 195  64   0   0   0   0   0   0
   0   0   0   0   0  49 238 253 253 253 253 253 253 253 253 251  93  82
  82  56  39   0   0   0   0   0   0   0   0   0   0   0   0  18 219 253
 253 253 253 253 198 182 247 241   0   0   0   0   

In [66]:
#Now to optimise our model let's just scale down the datasets to value in range [0, 1]
X_train = (X_train/255)
X_test = (X_test/255)

#OR:
#   X_train = X_train.astype('float32') / 255.0
#   X_test = X_test.astype('float32') / 255.0 

Proceeding to create the model:

In [67]:
#Classifier : clf
#For solver we'll use adam(as datasize is very large), activation we'll be using: relu, and 2 hidden layers of (64 x 64).
clf = MLPClassifier(solver = 'adam', activation = 'relu', hidden_layer_sizes = (64, 64))

MODEL_TRAINING

In [68]:
clf.fit(X_train, y_train)

In [69]:
#Checking the accuracy of the model:
prediction = clf.predict(X_test)
acc = confusion_matrix(y_test, prediction)
print(f'The accuracy of out model is: {acc}')
#This will provide us a matrix 


The accuracy of out model is: [[ 968    0    2    0    2    2    3    1    1    1]
 [   0 1124    2    1    0    1    2    1    4    0]
 [   3    0 1008    4    2    0    3    4    7    1]
 [   0    0    9  979    0    6    2    4    4    6]
 [   1    1    3    1  960    2    4    2    0    8]
 [   2    0    1   12    2  862    8    0    3    2]
 [   7    2    1    0    4    3  939    1    1    0]
 [   1    4    9    1    2    0    0 1001    5    5]
 [   5    1    3    4    4    5    6    5  937    4]
 [   2    3    1    3    9    1    1    8    1  980]]


In [70]:
#To find how well our mpdel did, we need to perform some matrix operations:
#Which is----> [Trace()/sum of all the elements]
#Making a function with cm(any matrix) as input
def accuracy(cm):       
    diagonal = cm.trace()
    sum_of_elements = cm.sum()
    return (diagonal/sum_of_elements) * 100

In [71]:
print(f'The accuracy of our model is: {accuracy(acc)}')  

The accuracy of our model is: 97.58
