# Get the MNIST data
Get the data using the keras loading function, then process it so it is ready for our fully connected network (FCN).

# Errors?
Check to see that the **kernel** is correct (Python 6.6/Conda 5.2).


In [1]:
from keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

short = False
if short:
    train_images = train_images[:7000,:]
    train_labels = train_labels[:7000]
    test_images = test_images[:3000,:]
    test_labels = test_labels[:3000]
#
print("Train info",train_images.shape, train_labels.shape)
print("Test info",test_images.shape, test_labels.shape)
train_images = train_images.reshape((train_images.shape[0],28*28))
train_images = train_images.astype('float32')/255

test_images = test_images.reshape((test_images.shape[0],28*28))
test_images = test_images.astype('float32')/255
from keras.utils import to_categorical

train_labels_cat = to_categorical(train_labels)
test_labels_cat = to_categorical(test_labels)


Using TensorFlow backend.


Train info (60000, 28, 28) (60000,)
Test info (10000, 28, 28) (10000,)


# Build the FCN and train it
We will build a simple 1-hidden-layer network.   We will use 400 hidden nodes since that was close to optimal based on our earlier studies.   We will then save the network for later use.

In [2]:
from keras import models
from keras import layers

network = models.Sequential()
network.add(layers.Dense(400,activation='tanh',input_shape=(28*28,)))
network.add(layers.Dense(10,activation='softmax'))
network.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
#
history = network.fit(train_images,train_labels_cat,epochs=15,batch_size=128,validation_data=(test_images,test_labels_cat))
network.save('fully_trained_model_fcn.h5')


Train on 60000 samples, validate on 10000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


# Finish
The performance on the standard test set is around 98% which is pretty good!

In [3]:
predictions = network.predict(test_images)

In [5]:
from collections import defaultdict
from functools import partial
from itertools import repeat
def nested_defaultdict(default_factory, depth=1):
    result = partial(defaultdict, default_factory)
    for _ in repeat(None, depth - 1):
        result = partial(defaultdict, result)
    return result()

confusion_matrix = nested_defaultdict(int,2)

correct_certainty = []
notcorrect_certainty = []

import numpy as np

for true,pred in zip(test_labels_cat, predictions):
    if np.argmax(true) == np.argmax(pred):
        confusion_matrix[np.argmax(true)][np.argmax(pred)] += 1
        correct_certainty.append(pred[np.argmax(pred)])
    else:
        confusion_matrix[np.argmax(true)][np.argmax(pred)] += 1
        notcorrect_certainty.append(pred[np.argmax(pred)])
        
        
for i in range(10):
    print("True: ",i,end="")
    for j in range(10):
        print("\t",confusion_matrix[i][j],end="")
    print()
print()

True:  0	 974	 1	 1	 0	 0	 1	 1	 1	 1	 0
True:  1	 1	 1125	 2	 1	 0	 1	 2	 1	 2	 0
True:  2	 3	 0	 1012	 2	 1	 0	 2	 3	 8	 1
True:  3	 0	 0	 4	 989	 0	 4	 0	 4	 3	 6
True:  4	 1	 0	 2	 1	 961	 0	 4	 2	 1	 10
True:  5	 2	 0	 0	 9	 1	 875	 2	 1	 1	 1
True:  6	 5	 2	 1	 1	 1	 7	 940	 0	 1	 0
True:  7	 0	 1	 6	 2	 1	 1	 0	 1009	 3	 5
True:  8	 1	 0	 2	 3	 2	 5	 1	 2	 954	 4
True:  9	 3	 2	 0	 4	 6	 5	 0	 3	 1	 985

