In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from tensorflow.keras.datasets import mnist

In [3]:
(x_train,y_train),(x_test,y_test) = mnist.load_data()

## Visualizing the Image Data

In [5]:
x_train.shape

In [6]:
single_image = x_train[0]

In [7]:
single_image.shape

In [8]:
plt.imshow(single_image)

# PreProcessing Data

We first need to make sure the labels will be understandable by our CNN.

## Labels

In [11]:
y_train

In [12]:
y_test

Hmmm, looks like our labels are literally categories of numbers. We need to translate this to be "one hot encoded" so our CNN can understand, otherwise it will think this is some sort of regression problem on a continuous axis. Luckily , Keras has an easy to use function for this:

In [14]:
from tensorflow.keras.utils import to_categorical

In [15]:
y_train.shape

In [16]:
y_example = to_categorical(y_train)

In [17]:
y_example

In [18]:
y_example.shape

In [19]:
y_example[0]

In [20]:
y_cat_test = to_categorical(y_test,10)

In [21]:
y_cat_train = to_categorical(y_train,10)

### Processing X Data

We should normalize the X data

In [23]:
single_image.max()

In [24]:
single_image.min()

In [25]:
x_train = x_train/255
x_test = x_test/255

In [26]:
scaled_single = x_train[0]

In [27]:
scaled_single.shape

In [28]:
scaled_single.max()

In [29]:
plt.imshow(scaled_single)

## Reshaping the Data

Right now our data is 60,000 images stored in 28 by 28 pixel array formation. 

This is correct for a CNN, but we need to add one more dimension to show we're dealing with 1 RGB channel (since technically the images are in black and white, only showing values from 0-255 on a single channel), an color image would have 3 dimensions.

In [31]:
x_train.shape

In [32]:
x_test.shape

Reshape to include channel dimension (in this case, 1 channel)

In [34]:
x_train = x_train.reshape(60000, 28, 28, 1)

In [35]:
x_train.shape

In [36]:
x_test = x_test.reshape(10000,28,28,1)

In [37]:
x_test.shape

## Training the Model

In [39]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Dense, Conv2D, 
                                     MaxPool2D, Flatten)

In [40]:
model = Sequential()

# CONVOLUTIONAL LAYER()
model.add(Conv2D(filters=32, kernel_size=(4,4),
                 input_shape=(28,28,1), activation='relu'))
# POOLING LAYER
model.add(MaxPool2D(pool_size=(2,2)))

# FLATTEN IMAGES FROM 28 by 28 to 764 BEFORE FINAL LAYER
model.add(Flatten())

# 128 NEURONS IN DENSE HIDDEN LAYER (WE CAN CHANGE THIS NUMBER OF NEURONS)
model.add(Dense(units=128, activation='relu'))

# LAST LAYER IS THE CLASSIFIER, THUS 10 POSSIBLE CLASSES
model.add(Dense(10, activation='softmax'))

# https://keras.io/metrics/
model.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy']) # we can add in additional metrics

In [41]:
model.summary()

In [42]:
from tensorflow.keras.callbacks import EarlyStopping

In [43]:
early_stop = EarlyStopping(monitor='val_loss', patience=2)

## Train the Model

In [45]:
model.fit(x_train,y_cat_train,
         validation_data=(x_test,y_cat_test),
         epochs=10,
         verbose=2,
         callbacks=[early_stop])

In [46]:
metrics = pd.DataFrame(model.history.history)

In [47]:
metrics

Unnamed: 0,loss,acc,val_loss,val_acc
0,0.137474,0.958967,0.051356,0.9838
1,0.047196,0.985633,0.043206,0.9861
2,0.031191,0.9907,0.035191,0.9887
3,0.020693,0.99365,0.043879,0.9868
4,0.015137,0.994983,0.040872,0.9876


In [48]:
metrics[['loss','val_loss']].plot()

In [49]:
metrics[['acc','val_acc']].plot()