
# MNIST Neural Network Tutorial
Today we are going to look into neural networks! What do we need? First we need a lot of labeled data. In this example we will use a very popular introductary dataset, the MNIST dataset. MNIST consists of images (28x28 pixels) with hand-drawn numbers ranging from 0 to 9, and their corresponding 'correct' number. 

For this type of data, 2D convolutional neural networks (CNN) are most appropriate. Let's get started!

In [None]:
# Import required modules
import numpy as np     
import os                             
import sys                 
import PIL
import PIL.Image
import tensorflow as tf
import matplotlib.pyplot as plt
import imageio
import pathlib

In [None]:
mnist = tf.keras.datasets.mnist # load the MNIST data
(x_train, y_train), (x_test, y_test) = mnist.load_data() 
x_train, x_test = x_train / 255.0, x_test / 255.0 # NN generally performs best with inputs ranging from [0 1]

print(x_train.shape)    # training data sample
print(x_test.shape)     # testing data sample

# Let's build a Neural Network!
Cool, the data is loaded! Now let's first build a very simple network before looking into CNN

In [None]:
# built a simple model
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),  # input layer: transform inputs to 1D (None, 28*28=784)
  tf.keras.layers.Dense(64, activation='relu'),   # hidden layer
  tf.keras.layers.Dense(64, activation='relu'),   # hidden layer
  tf.keras.layers.Dense(64, activation='relu'),   # hidden layer
  tf.keras.layers.Dense(10, activation='softmax') # output layer
])
model.summary

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(
    x_train, 
    y_train, 
    batch_size=10,
    epochs=2,
    validation_data=(x_test, y_test)
)

# The Network is trained!
That is already a pretty large accuracy for such a simple network! Maybe we don't need any thing complicated at all? First, lets look at some of the mistakes the network makes:

In [None]:
prediction = model.predict(x_test)
n = 0
for i in range(0,len(y_test)):
    max_index = np.argmax(prediction[i], axis=0)
    if max_index != y_test[i]:
        n = n + 1
        if n < 9:
            print("Network thinks it is a: ", max_index, " but it is actually a: ", y_test[i])
            plt.imshow(x_test[i])
            plt.show()
acc = (len(y_test)-n)/len(y_test)

# Can we do better??
Lets try Convolutional Neural Networks! These networks can capture spatial features from images, instead of simply taking each pixel as a separte value.

In [None]:
model_CNN = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1)), # input layer
  tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)), # hidden layer
  tf.keras.layers.MaxPooling2D((2, 2)),                                           # pooling layer (dimension reduction)
  tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)), # hidden layer
  tf.keras.layers.Flatten(),                                                      # transform inputs to 1D 
  tf.keras.layers.Dense(64, activation='relu'),                                   # hidden layer
  tf.keras.layers.Dense(10, activation='softmax')                                 # Output layer
])

model_CNN.summary

x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)

model_CNN.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model_CNN.fit(
    x_train, 
    y_train, 
    batch_size=32,
    epochs=2,
    validation_data=(x_test, y_test)
)

In [None]:
prediction = model_CNN.predict(x_test)
n = 0
for i in range(0,len(y_test)):
    max_index = np.argmax(prediction[i], axis=0)
    if max_index != y_test[i]:
        n = n + 1
        if n < 9:
            print("Network thinks it is a: ", max_index, " but it is actually a: ", y_test[i])
            plt.imshow(x_test[i])
            plt.show()

# What happens with new independent data?
I made my own dataset, lets see what happens! This is a relative small dataset, with a total of 64 images, 32 with zeros and 32 with ones. What will happen when we feed this data to the network? Note that 'random guessing' would result in an accuray of 0.10 (not 0.50!), since the network expects the data to range from 0 to 9. 

In [None]:
# Load the independent test-set
data_dir = os.path.join(sys.path[0], "test_set")
data_dir = pathlib.Path(data_dir)

image_count = len(list(data_dir.glob('*.png'))) 
images = list(data_dir.glob('*.png')) # Extract all png files

y_test_gsd = [None]*image_count
x_test_gsd = np.empty([1,28,28]) # Initialize

for i in range(0,image_count):
    if "one" in str(images[i]): # if filename starts with 'one' it is 1
        y_test_gsd[i] = 1
    else:                       # else it is a 0
        y_test_gsd[i] = 0
    im = imageio.imread(images[i])
    
    im = (255-im) /255
    if i == 0:     # if first iter, x_test_gsd = first image
        x_test_gsd = im[None,...]
    else:          # else appand image to x_test_gsd
        x_test_gsd = np.concatenate([x_test_gsd,im[None,...]],axis=0)

y_test_gsd = np.array(y_test_gsd)

print(x_test_gsd.shape)    # training data sample

# Model 1: The simple network

In [None]:
x_test_gsd = x_test_gsd.reshape(x_test_gsd.shape[0], 28, 28, 1)

prediction = model.predict(x_test_gsd)

n = 0
for i in range(0,len(y_test_gsd)):
    max_index = np.argmax(prediction[i], axis=0)
    if max_index != y_test_gsd[i]:
        n = n + 1
        if n < 9:
            print("Network thinks it is a: ", max_index, " but it is actually a: ", y_test_gsd[i])
            plt.imshow(x_test_gsd[i])
            plt.show()
acc = (len(y_test_gsd)-n)/len(y_test_gsd)
print("accuracy: ",acc)

# Model 2: Convolutional Neural Network

In [None]:
x_test_gsd = x_test_gsd.reshape(x_test_gsd.shape[0], 28, 28, 1)

prediction = model_CNN.predict(x_test_gsd)

n = 0
for i in range(0,len(y_test_gsd)):
    max_index = np.argmax(prediction[i], axis=0)
    if max_index != y_test_gsd[i]:
        n = n + 1
        if n < 9:
            print("Network thinks it is a: ", max_index, " but it is actually a: ", y_test_gsd[i])
            plt.imshow(x_test_gsd[i])
            plt.show()
acc = (len(y_test_gsd)-n)/len(y_test_gsd)
print("accuracy: ",acc)

# Take home message
Without an independent validation dataset the evaluation of Neural Network performance can be biased.