# QuickDraw - 3 - Creating CoreML model 
--- 

# The Quick, Draw! Dataset

The Quick Draw Dataset is a collection of 50 million drawings across 345 categories, contributed by players of the game Quick, Draw!. The drawings were captured as timestamped vectors, tagged with metadata including what the player was asked to draw and in which country the player was located. You can browse the recognized drawings on quickdraw.withgoogle.com/data or download the dataset from https://console.cloud.google.com/storage/browser/quickdraw_dataset/?pli=1.  

The architecture was ported across from the tutorial <a href='https://www.tensorflow.org/versions/master/tutorials/recurrent_quickdraw'>Recurrent Neural Networks for Drawing Classification</a> (associated repo available <a href='https://github.com/tensorflow/models/tree/master/tutorials/rnn/quickdraw'>here</a>); of which many of the details have been used here.  

<img src='https://github.com/googlecreativelab/quickdraw-dataset/raw/master/preview.jpg'/>

In [1]:
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
from scipy.misc import imresize
import os
import random
import csv

In [2]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

plt.style.use('ggplot')

In [3]:
import keras 
keras.__version__

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


'2.0.8'

In [4]:
import json 
from keras.models import model_from_json

## Constants 

In [5]:
DATASET_DIR = '/Users/Joshua.Newnham/Documents/Data/quickdraw_dataset/sketchrnn_training_data/'

TRAINING_PARTS = 5 

MAX_SEQ_LEN = 75
CLASSES = 172

In [6]:
def pad_stroke_sequence(x, max_len=MAX_SEQ_LEN):
    padded_x = np.zeros((x.shape[0], max_len, 3), dtype=np.float32)
    for i in range(x.shape[0]):
        X = x[i]
        if X.shape[0] > max_len:
            X = X[:max_len, :]
        elif X.shape[0] < max_len:
            padding = np.array([[0,0,0]] * (max_len-X.shape[0]), dtype=np.float32)            
            X = np.vstack((padding, X))
            
        padded_x[i] = X
        
    return padded_x

## Load data 

In [7]:
# Load files 

train_x_files = [] 
train_y_files = []
valid_x_files = [] 
valid_y_files = []

for part_num in range(1, TRAINING_PARTS+1):
    train_x_files.append(os.path.join(DATASET_DIR, "train_{}_x.npy".format(part_num)))
    train_y_files.append(os.path.join(DATASET_DIR, "train_{}_y.npy".format(part_num)))
    valid_x_files.append(os.path.join(DATASET_DIR, "validation_{}_x.npy".format(part_num)))
    valid_y_files.append(os.path.join(DATASET_DIR, "validation_{}_y.npy".format(part_num)))

In [8]:
train_1_x = np.load(train_x_files[0])
train_1_y = np.load(train_y_files[0])
valid_1_x = np.load(valid_x_files[0])
valid_1_y = np.load(valid_y_files[0])

padded_train_1_x = pad_stroke_sequence(train_1_x)
padded_valid_1_x = pad_stroke_sequence(valid_1_x)

print("train_1_x {}, train_1_y {}, padded_train_1_x {}\ntrain_1_y {}, valid_1_y {}, padded_valid_1_x {}".format(
    train_1_x.shape, 
    train_1_y.shape, 
    padded_train_1_x.shape, 
    valid_1_x.shape, 
    valid_1_y.shape, 
    padded_valid_1_x.shape))

train_1_x (344000,), train_1_y (344000, 172), padded_train_1_x (344000, 75, 3)
train_1_y (34400,), valid_1_y (34400, 172), padded_valid_1_x (34400, 75, 3)


## Load model and model weights 

In [9]:
with open('output/quickdraw_arch.json', 'r') as f:
    json_string = json.load(f)
    model = model_from_json(json_string)
    
model.load_weights('output/quickdraw_weights.h5')

In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_0 (Conv1D)            (None, 75, 48)            768       
_________________________________________________________________
dropout_1 (Dropout)          (None, 75, 48)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 75, 64)            15424     
_________________________________________________________________
dropout_2 (Dropout)          (None, 75, 64)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 75, 96)            18528     
_________________________________________________________________
dropout_3 (Dropout)          (None, 75, 96)            0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 75, 128)           36992     
__________

Let's now test our model by making some predictions on our loaded training and validation data 

In [11]:
### Load categories 
categories = {}

with open('labels.csv', 'r') as f:
    csv_reader = csv.reader(f)
    for row in csv_reader:
        categories[int(row[0])] = row[1]
        
print("Loaded categories - number of categories {}".format(len(categories)))

Loaded categories - number of categories 172


In [12]:
### Sort by key 
categories = [categories[key] for key in sorted(categories.keys())]

In [13]:
def make_prediction(model, labels, X, index):    
    x = X[index]
    x = x.reshape(1, 
                  x.shape[0], 
                  x.shape[1])
    x = pad_stroke_sequence(x)
    
    prediction = np.argmax(
        model.predict(x)
    )
    return labels[prediction]

In [14]:
prediction = make_prediction(model, categories, train_1_x, 0)
print("prediction {} expected {}".format(prediction, categories[np.argmax(train_1_y[0])]))

prediction airplane expected airplane


In [15]:
prediction = make_prediction(model, categories, valid_1_x, 1700)
print("prediction {} expected {}".format(prediction, categories[np.argmax(valid_1_y[1700])]))

prediction banana expected banana


In [16]:
idx = random.randint(0, valid_1_x.size)
prediction = make_prediction(model, categories, valid_1_x, idx)
print("{} :: prediction {} expected {}".format(idx, 
                                               prediction, 
                                               categories[np.argmax(valid_1_y[idx])]))

25496 :: prediction school bus expected school bus


In [17]:
idx = random.randint(0, valid_1_x.size)
prediction = make_prediction(model, categories, valid_1_x, idx)
print("{} :: prediction {} expected {}".format(idx, 
                                               prediction, 
                                               categories[np.argmax(valid_1_y[idx])]))

22323 :: prediction parrot expected parrot


## Create CoreML model

https://apple.github.io/coremltools/index.html  
https://www.pydoc.io/pypi/coremltools-0.7/autoapi/converters/keras/_layers2/index.html

Below, rather than loading the arhcitecture, we create our model again with a minor adjustment in that we modify the expected input to be a flatten vector from 75,3 to 255. The reason for this is that CoreML (the converter) assumes the input shape is [Seq, D], and therefore returns (dim[1],) where dim would be (75,3) in your case i.e. will have the input shape as [3].  
We get around this by flattening (75,3) to (255) and reshape it before feeding it into our previous architecture. 

Some useful information about working with sequnece data can be found on the Apple developer website https://developer.apple.com/documentation/coreml/core_ml_api/making_predictions_with_a_sequence_of_inputs

In [18]:
import coremltools

In [19]:
from keras import layers
from keras import models
from keras import optimizers
from keras import callbacks

from keras.utils import plot_model

from keras import preprocessing
from keras.preprocessing import image

BATCH_SIZE = 512 
MAX_SEQ_LEN = 75
CLASSES = 172
NUM_RNN_LAYERS = 3 
NUM_RNN_NODES = 128
NUM_CONV = [48, 64, 96, 128]
CONV_LEN = [5, 5, 3, 3]
DROPOUT = 0.3

def create_model(input_shape=(MAX_SEQ_LEN, 3), 
                 num_conv=NUM_CONV, 
                 conv_len=CONV_LEN, 
                 dropout=DROPOUT, 
                 batch_size=BATCH_SIZE, 
                 num_rnn_layers=NUM_RNN_LAYERS, 
                 num_rnn_nodes=NUM_RNN_NODES, 
                 num_classes=CLASSES):
    
    model = models.Sequential() 
    
    model.add(layers.Reshape(input_shape, input_shape=(input_shape[0] * input_shape[1],)))
    
    for i, filters in enumerate(num_conv):
        if i == 0:
            # TODO: feasible to use a TimeDistributed wrapper here? https://keras.io/layers/wrappers/
            model.add(
                layers.Conv1D(filters=filters, 
                              kernel_size=conv_len[i], 
                              activation=None, 
                              strides=1, 
                              padding='same', 
                              name='conv1d_{}'.format(i)))
        else:
            model.add(layers.Dropout(dropout, name="dropout_{}".format(i)))
            model.add(layers.Conv1D(filters=filters, 
                                    kernel_size=conv_len[i], 
                                    activation=None, 
                                    strides=1, 
                                    padding='same', 
                                    name='conv1d_{}'.format(i)))
      
    for i in range(num_rnn_layers):
        model.add(layers.Bidirectional(layers.LSTM(units=num_rnn_nodes, 
                                                   return_sequences=True, 
                                                   recurrent_dropout=dropout), 
                                       name="lstm_{}".format(i)))
    
    model.add(layers.Flatten())
    model.add(layers.Dense(num_classes, activation="softmax"))
    
                      
    return model 

In [20]:
model = create_model()
model.load_weights('output/quickdraw_weights.h5')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_1 (Reshape)          (None, 75, 3)             0         
_________________________________________________________________
conv1d_0 (Conv1D)            (None, 75, 48)            768       
_________________________________________________________________
dropout_1 (Dropout)          (None, 75, 48)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 75, 64)            15424     
_________________________________________________________________
dropout_2 (Dropout)          (None, 75, 64)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 75, 96)            18528     
_________________________________________________________________
dropout_3 (Dropout)          (None, 75, 96)            0         
__________

### Test our model 

In [21]:
def make_prediction(model, labels, X, index): 
    """
    Modified prediction function that flattens the data for our CoreML model 
    (sanity check that everything is working correctly)
    """
    x = X[index]
    x = x.reshape(1, 
                  x.shape[0], 
                  x.shape[1])
    x = pad_stroke_sequence(x)
    
    prediction = np.argmax(
        model.predict(x.reshape(1, -1))
    )
    return labels[prediction]

In [22]:
print("prediction {}, actual {}".format(
    make_prediction(model, categories, train_1_x, 0), 
    categories[np.argmax(train_1_y[0])]))

prediction airplane, actual airplane


In [23]:
print("prediction {}, actual {}".format(
    make_prediction(model, categories, train_1_x, 8000), 
    categories[np.argmax(train_1_y[8000])]))

prediction apple, actual apple


In [24]:
print("prediction {}, actual {}".format(
    make_prediction(model, categories, train_1_x, 18000), 
    categories[np.argmax(train_1_y[18000])]))

prediction barn, actual barn


In [25]:
coreml_model = coremltools.converters.keras.convert(model, 
                                                    input_names=['strokeSeq'],
                                                    output_names=['classLabelProbs'],
                                                    class_labels=categories)

0 : reshape_1_input, <keras.engine.topology.InputLayer object at 0x137090a50>
1 : reshape_1, <keras.layers.core.Reshape object at 0x137090a10>
2 : reshape_1_permute_conv1d_0, <keras.layers.core.Permute object at 0x139bca290>
3 : conv1d_0, <keras.layers.convolutional.Conv1D object at 0x137090d10>
4 : conv1d_1, <keras.layers.convolutional.Conv1D object at 0x137090e50>
5 : conv1d_2, <keras.layers.convolutional.Conv1D object at 0x137d51f10>
6 : conv1d_3, <keras.layers.convolutional.Conv1D object at 0x136366fd0>
7 : conv1d_3_permute_lstm_0, <keras.layers.core.Permute object at 0x136c4c250>
8 : lstm_0, <keras.layers.wrappers.Bidirectional object at 0x1352c1350>
9 : lstm_1, <keras.layers.wrappers.Bidirectional object at 0x12d84cd10>
10 : lstm_2, <keras.layers.wrappers.Bidirectional object at 0x135364c10>
11 : flatten_1, <keras.layers.core.Flatten object at 0x1370b4c10>
12 : dense_1, <keras.layers.core.Dense object at 0x13b00d190>
13 : dense_1__activation__, <keras.layers.core.Activation objec

In [100]:
coreml_model.author = 'Joshua Newnham'
coreml_model.license = 'BSD'
coreml_model.short_description = 'Freehand sketch recognizer trained using the dataset from Googles AI experiment AutoDraw using their QuickDraw dataset'
coreml_model.input_description['strokeSeq'] = 'Sequence of strokes - flattened (75,3) to (255)'
coreml_model.output_description['classLabelProbs'] = 'Probability of each category (Dict where the key is the category and value is the probability)'

In [101]:
coreml_model.save('output/quickdraw.mlmodel')

### Test our CoreML model 

In [26]:
def test_coreml(coreml_model, 
                labels, 
                X, 
                Y,
                index):
    
    x = X[index]
    x = x.reshape(1, 
                  x.shape[0], 
                  x.shape[1])
    x = pad_stroke_sequence(x)
    
    x = X[index]
    x = x.reshape(1, 
                  x.shape[0], 
                  x.shape[1])
    x = pad_stroke_sequence(x)
    
    results = coreml_model.predict({'strokeSeq':x.reshape(-1)})
    predicted_prob = results['classLabelProbs']
    return sorted(predicted_prob.items(), key=lambda (k,v): (v,k), reverse=True)[0]

In [29]:
idx = 0
print("index :: {} predicted {} actual {}".format(
    idx, 
    test_coreml(coreml_model, categories, train_1_x, train_1_y, idx), 
    categories[np.argmax(train_1_y[idx])]))

index :: 0 predicted (u'airplane', 0.8395060300827026) actual airplane


In [27]:
idx = random.randint(0, train_1_x.size)
print("index :: {} predicted {} actual {}".format(
    idx, 
    test_coreml(coreml_model, categories, train_1_x, train_1_y, idx), 
    categories[np.argmax(train_1_y[idx])]))

index :: 138468 predicted (u'hat', 0.7071692943572998) actual hat


In [28]:
idx = random.randint(0, train_1_x.size)
print("index :: {} predicted {} actual {}".format(
    idx, 
    test_coreml(coreml_model, categories, train_1_x, train_1_y, idx), 
    categories[np.argmax(train_1_y[idx])]))

index :: 293140 predicted (u'submarine', 0.6821426153182983) actual submarine
