In [12]:
# imports necessary.
import numpy as np
from sklearn.model_selection import train_test_split
from keras.utils import np_utils, print_summary
from keras.models import Sequential
from keras.layers import Conv2D, Activation, MaxPooling2D, Dense, Flatten, Dropout
import pickle
import matplotlib.pyplot as plt

In [13]:
# method to read from the pickle the processed data
def loadData():
    with open("x", "rb") as f:
        images = np.array(pickle.load(f))
    with open("y", "rb") as f:
        classes = np.array(pickle.load(f))
    return images, classes

In [14]:
# method to build the cnn model with differenet layers.
def cnnModel():
    numClasses = 5
    
    # The Sequential model is a linear stack of layers. so you can add the layers one by one.
    model = Sequential()
    
    # The model needs to know what input shape it should expect. For this reason, the first layer in a Sequential 
    # model needs to receive information about its input shape. 
    # parameters in order [filters, kernel_size, input_shape, activation function.
    # pool_size is the size of the max pooling windows. strides is the factor by which to downscale.
    model.add(Conv2D(filters = 32, kernel_size = (5, 5), input_shape = (28, 28, 1), padding = 'same', activation = 'relu'))
    model.add(Conv2D(filters = 64, kernel_size = (5, 5), padding = 'same', activation = 'relu'))   
    model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2), padding='same'))
    
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), padding = 'same', activation = 'relu'))
    model.add(Conv2D(filters = 128, kernel_size = (3, 3), padding = 'same', activation = 'relu'))
    model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2), padding='same'))
    
    model.add(Flatten())
    model.add(Dense(512, activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(numClasses, activation = 'softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy','mae'])
    filepath = "QuickDraw.h5"

    return model

In [15]:
# read the data 
images, classes = loadData()
# eg. consider an array of {1,2,3,4,2}
# output will be 
# array([[ 0.,  1.,  0., 0., 0.],
#       [ 0.,  0.,  1., 0., 0.],
#       [ 0.,  0.,  0., 1., 0.],
#       [ 0.,  0.,  0., 0., 1.],
#       [ 0.,  0.,  1., 0., 0.]])
classes = np_utils.to_categorical(classes)

# split data for training and testing
train_x, test_x, train_y, test_y = train_test_split(images, classes, random_state=42, test_size=0.1)
# reshape from (30000,784) to (30000, 28, 28, 1), the 1 signifies that the images are grayscale.
train_x = train_x.reshape(train_x.shape[0], 28, 28, 1)
test_x = test_x.reshape(test_x.shape[0], 28, 28, 1)

# create the cnn model with different layers
model = cnnModel()

# print the cnn model information
print_summary(model)
print("Fitting the model")

# fit the test data  
# The no of examples from the training dataset used in the estimate of the error gradient is called the batch size
# epochs is the no of iterations through the whole x, y to train the model.
model.fit(train_x, train_y, validation_data=(test_x, test_y), epochs=5, batch_size=100)

# this is to save the model and then use it later to predict.
model.save('QuickDraw.h5')

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 28, 28, 32)        832       
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 28, 28, 64)        51264     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 14, 14, 64)        36928     
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 14, 14, 128)       73856     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 7, 7, 128)         0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 6272)             

In [16]:
# store the variables, across notebooks.
%store test_x
%store test_y

Stored 'test_x' (ndarray)
Stored 'test_y' (ndarray)
