In [18]:
# get necessary modules

from PIL import Image
import numpy as np
import mnist
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
# lets create the training and testing variables

X_train = mnist.train_images()
y_train = mnist.train_labels()

In [3]:
X_test = mnist.test_images()
y_test = mnist.test_labels()

In [4]:
# take a look at the data
print('X_train', X_train)
print('y_train', y_train)
print('X_test', X_test)

X_train [[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 ...

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]]
y_train [5 0 4 ... 5 6 8]
X_test [[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 

In [5]:
# this looks to be like a 3D array for x, and the y data looks like just digits

# now, lets look at the dimensions of x
print("dimensions", X_train.ndim)
print("shape", X_train.shape)

dimensions 3
shape (60000, 28, 28)


In [6]:
# we can see that there are 60000 instances / samples, and that there are 28 x 28 images
# we have to convert it and combine it to make one list of all the pixels

X_train = X_train.reshape(60000, 28 *28)

In [7]:
# now, lets look at the dimensions of x
print("dimensions", X_train.ndim)
print("shape", X_train.shape)

dimensions 2
shape (60000, 784)


In [8]:
# we got it now, and have to do the exact same thing for X_test, and y_test
X_test = X_test.reshape((-1, 28*28))
print("dimensions", X_test.ndim)
print("shape", X_test.shape)

dimensions 2
shape (10000, 784)


In [9]:
# now, let's take a look at the range of the data
print(X_train[0])

[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   3  18  18  18 126 136 175  26 166 255
 247 127   0   0   0   0   0   0   0   0   0   0   0   0  30  36  94 154
 170 253 253 253 253 253 225 172 253 242 195  64   0   0   0   0   0   0
   0   0   0   0   0  49 238 253 253 253 253 253 253 253 253 251  93  82
  82  56  39   0   0   0   0   0   0   0   0   0   0   0   0  18 219 253
 253 253 253 253 198 182 247 241   0   0   0   0   

In [10]:
# we need to make this from 0 to 1
X_train = (X_train/256)
X_test = (X_test/256)

In [11]:
print(X_train)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [12]:
# now, lets create the model to classify. We can use adam, which is very efficient for large amounts of data
clf = MLPClassifier(solver = 'adam', activation = 'relu',hidden_layer_sizes = (64,64 ), verbose = True)

In [13]:
clf.fit(X_train, y_train)

Iteration 1, loss = 0.45576649
Iteration 2, loss = 0.19180179
Iteration 3, loss = 0.14591132
Iteration 4, loss = 0.11841694
Iteration 5, loss = 0.09906474
Iteration 6, loss = 0.08716018
Iteration 7, loss = 0.07481765
Iteration 8, loss = 0.06586141
Iteration 9, loss = 0.05756801
Iteration 10, loss = 0.05098026
Iteration 11, loss = 0.04540957
Iteration 12, loss = 0.04109191
Iteration 13, loss = 0.03712509
Iteration 14, loss = 0.03168989
Iteration 15, loss = 0.02877522
Iteration 16, loss = 0.02533041
Iteration 17, loss = 0.02415276
Iteration 18, loss = 0.02050530
Iteration 19, loss = 0.01923741
Iteration 20, loss = 0.01669697
Iteration 21, loss = 0.01497690
Iteration 22, loss = 0.01272094
Iteration 23, loss = 0.01248181
Iteration 24, loss = 0.01171617
Iteration 25, loss = 0.01283543
Iteration 26, loss = 0.01214106
Iteration 27, loss = 0.00935250
Iteration 28, loss = 0.00667137
Iteration 29, loss = 0.00939399
Iteration 30, loss = 0.00900226
Iteration 31, loss = 0.01057175
Iteration 32, los

MLPClassifier(hidden_layer_sizes=(64, 64), verbose=True)

In [16]:
# now that it is trained, let's test it
predictions = clf.predict(X_test)
acc = confusion_matrix(y_test, predictions)
print(acc)

[[ 966    0    0    2    1    2    2    0    3    4]
 [   0 1127    1    0    0    0    2    1    4    0]
 [   4    3 1008    4    0    0    0    4    8    1]
 [   0    0    3  991    0    4    0    4    3    5]
 [   0    0    2    1  961    0    3    1    1   13]
 [   1    1    0    9    2  869    5    0    4    1]
 [   6    1    2    0    5    6  936    0    2    0]
 [   3    5   11    5    2    0    0  992    3    7]
 [   3    0    5    5    3    2    0    5  947    4]
 [   1    4    0    6   12    1    1    3    5  976]]


In [19]:
print("testing accuracy\n", classification_report(y_test,predictions) )

testing accuracy
               precision    recall  f1-score   support

           0       0.98      0.99      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.98      0.98      0.98      1032
           3       0.97      0.98      0.97      1010
           4       0.97      0.98      0.98       982
           5       0.98      0.97      0.98       892
           6       0.99      0.98      0.98       958
           7       0.98      0.96      0.97      1028
           8       0.97      0.97      0.97       974
           9       0.97      0.97      0.97      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000



In [24]:
# lets define a method for accuracy, and find accuracy
def accuracy(cm):
    diagonal = cm.trace().sum()
    elements = cm.sum()
    return diagonal/elements

print(accuracy(acc))

0.9773
