In [19]:
batch_size = 2048
STROKE_COUNT = 196
TRAIN_SAMPLES = 4000
VALID_SAMPLES = 1000
TEST_SAMPLES = 1000
NUM_CLASSES = 150

In [5]:
%matplotlib inline
import os
import numpy as np
np.random.seed(69)
import matplotlib.pyplot as plt
from keras.utils.np_utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from keras.metrics import top_k_categorical_accuracy
def top_3_accuracy(x,y): return top_k_categorical_accuracy(x,y, 3)
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from glob import glob
import gc
gc.enable()
def get_available_gpus():
    from tensorflow.python.client import device_lib
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']
base_dir = os.path.join('..')
test_path = os.path.join(base_dir, 'test_simplified.csv')

In [6]:
from ast import literal_eval
ALL_TRAIN_PATHS = glob(os.path.join(base_dir, 'train_simplified', '*.csv'))
COL_NAMES = ['countrycode', 'drawing', 'key_id', 'recognized', 'timestamp', 'word']

def _stack_it(raw_strokes):
    """preprocess the string and make 
    a standard Nx3 stroke vector"""
    stroke_vec = literal_eval(raw_strokes) # string->list
    # unwrap the list
    in_strokes = [(xi,yi,i)  
     for i,(x,y) in enumerate(stroke_vec) 
     for xi,yi in zip(x,y)]
    c_strokes = np.stack(in_strokes)
    # replace stroke id with 1 for continue, 2 for new
    c_strokes[:,2] = [1]+np.diff(c_strokes[:,2]).tolist()
    c_strokes[:,2] += 1 # since 0 is no stroke
    # pad the strokes with zeros
    x = pad_sequences(c_strokes.swapaxes(0, 1), 
                         maxlen=STROKE_COUNT, 
                         padding='post').swapaxes(0, 1)
    return x

def read_batch(samples=5, 
               start_row=0,
               max_rows = 1000):
    """
    load and process the csv files
    this function is horribly inefficient but simple
    """
    out_df_list = []
    for c_path in ALL_TRAIN_PATHS[:NUM_CLASSES]:
        c_df = pd.read_csv(c_path, nrows=max_rows, skiprows=start_row)
        c_df.columns=COL_NAMES
        out_df_list += [c_df.sample(samples)[['drawing', 'word']]]
    full_df = pd.concat(out_df_list)
    full_df['drawing'] = full_df['drawing'].\
        map(_stack_it)
    
    return full_df

# Reading and Parsing
Since it is too much data (23GB) to read in at once, we just take a portion of it for training, validation and hold-out testing. This should give us an idea about how well the model works, but leaves lots of room for improvement later

In [7]:
train_args = dict(samples=TRAIN_SAMPLES, 
                  start_row=0, 
                  max_rows=int(TRAIN_SAMPLES*1.5))
valid_args = dict(samples=VALID_SAMPLES, 
                  start_row=train_args['max_rows']+1, 
                  max_rows=VALID_SAMPLES+25)
test_args = dict(samples=TEST_SAMPLES, 
                 start_row=valid_args['max_rows']+train_args['max_rows']+1, 
                 max_rows=TEST_SAMPLES+25)
train_df = read_batch(**train_args)
valid_df = read_batch(**valid_args)
test_df = read_batch(**test_args)
word_encoder = LabelEncoder()
word_encoder.fit(train_df['word'])
print('words', len(word_encoder.classes_), '=>', ', '.join([x for x in word_encoder.classes_]))

words 150 => The Great Wall of China, The Mona Lisa, angel, anvil, axe, banana, bandage, barn, basket, beach, bench, binoculars, bird, blackberry, bread, bridge, broccoli, broom, bush, butterfly, calculator, calendar, camel, camera, campfire, cannon, canoe, car, carrot, cat, ceiling fan, chair, circle, cloud, coffee cup, compass, cookie, cooler, cow, crocodile, cup, diamond, diving board, dolphin, door, dragon, drill, drums, duck, ear, elbow, eraser, face, fan, feather, finger, flashlight, flip flops, flying saucer, frog, garden, golf club, grass, hamburger, harp, hat, helmet, hexagon, hockey puck, hockey stick, hot air balloon, house, house plant, ice cream, jacket, ladder, light bulb, lighthouse, lion, lipstick, marker, microphone, moon, mosquito, mouse, mouth, mug, necklace, octagon, owl, paper clip, peanut, pear, peas, pencil, pickup truck, picture frame, pillow, pizza, police car, pond, popsicle, power outlet, purse, radio, rhinoceros, roller coaster, sailboat, sandwich, saxophone

# Stroke-based Classification
Here we use the stroke information to train a model and see if the strokes give us a better idea of what the shape could be. 

In [8]:
def get_Xy(in_df):
    X = np.stack(in_df['drawing'], 0)
    y = to_categorical(word_encoder.transform(in_df['word'].values))
    return X, y
train_X, train_y = get_Xy(train_df)
valid_X, valid_y = get_Xy(valid_df)
test_X, test_y = get_Xy(test_df)
print(train_X.shape)

(600000, 196, 3)


In [9]:
# fig, m_axs = plt.subplots(3,3, figsize = (16, 16))
# rand_idxs = np.random.choice(range(train_X.shape[0]), size = 9)
# for c_id, c_ax in zip(rand_idxs, m_axs.flatten()):
#     test_arr = train_X[c_id]
#     test_arr = test_arr[test_arr[:,2]>0, :] # only keep valid points
#     lab_idx = np.cumsum(test_arr[:,2]-1)
#     for i in np.unique(lab_idx):
#         c_ax.plot(test_arr[lab_idx==i,0], 
#                 np.max(test_arr[:,1])-test_arr[lab_idx==i,1], '.-')
#     c_ax.axis('off')
#     c_ax.set_title(word_encoder.classes_[np.argmax(train_y[c_id])])

# LSTM to Parse Strokes
The model suggeted from the tutorial is

![Suggested Model](https://www.tensorflow.org/versions/master/images/quickdraw_model.png)

In [15]:
from keras.models import Sequential
from keras.layers import BatchNormalization, Conv1D, LSTM, Dense, Dropout, MaxPool1D
from keras.optimizers import Optimizer

In [36]:
from keras.optimizers import Optimizer
from keras import backend as K
import numpy as np

class Adam_accumulate(Optimizer):
    def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
                 epsilon=1e-8, accum_iters=20, **kwargs):
        super(Adam_accumulate, self).__init__(**kwargs)
        self.__dict__.update(locals())
        self.iterations = K.variable(0)
        self.lr = K.variable(lr)
        self.beta_1 = K.variable(beta_1)
        self.beta_2 = K.variable(beta_2)
        self.accum_iters = K.variable(accum_iters)
        
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [(self.iterations, self.iterations + 1)]

        t = self.iterations + 1
        lr_t = self.lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))

        ms = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
        vs = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
        gs = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
        self.weights = ms + vs

        for p, g, m, v, gg in zip(params, grads, ms, vs, gs):

            flag = K.equal(self.iterations % self.accum_iters, 0)
            flag = K.cast(flag, dtype='float32')

            gg_t = (1 - flag) * (gg + g)
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * (gg + flag * g) / self.accum_iters
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square((gg + flag * g) / self.accum_iters)
            p_t = p - flag * lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append((m, flag * m_t + (1 - flag) * m))
            self.updates.append((v, flag * v_t + (1 - flag) * v))
            self.updates.append((gg, gg_t))

            new_p = p_t
            # apply constraints
            if getattr(p, 'constraint', None) is not None:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append((p, new_p))
        return self.updates


    def get_config(self):
        config = {'lr': float(K.get_value(self.lr)),
                  'beta_1': float(K.get_value(self.beta_1)),
                  'beta_2': float(K.get_value(self.beta_2)),
                  'epsilon': self.epsilon}
        base_config = super(Adam_accumulate, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [37]:
if len(get_available_gpus())>0:
    # https://twitter.com/fchollet/status/918170264608817152?lang=en
    from keras.layers import CuDNNLSTM as LSTM # this one is about 3x faster on GPU instances
stroke_read_model = Sequential()
stroke_read_model.add(BatchNormalization(input_shape = (None,)+train_X.shape[2:]))
# filter count and length are taken from the script https://github.com/tensorflow/models/blob/master/tutorials/rnn/quickdraw/train_model.py
stroke_read_model.add(Conv1D(128, (5,), padding='same', activation='relu'))
stroke_read_model.add(Dropout(0.15))
stroke_read_model.add(MaxPool1D(pool_size=3, strides=2))
stroke_read_model.add(Conv1D(256, (3,), padding='same', activation='relu'))
stroke_read_model.add(Dropout(0.15))
stroke_read_model.add(Conv1D(256, (3,), padding='same', activation='relu'))
stroke_read_model.add(Dropout(0.15))
stroke_read_model.add(Conv1D(256, (3,), padding='same', activation='relu'))
stroke_read_model.add(Dropout(0.15))
stroke_read_model.add(MaxPool1D(pool_size=3, strides=2))
stroke_read_model.add(Dropout(0.2))
stroke_read_model.add(LSTM(128, return_sequences = True))
stroke_read_model.add(Dropout(0.3))
stroke_read_model.add(LSTM(128, return_sequences = False))
stroke_read_model.add(Dropout(0.3))
stroke_read_model.add(Dense(512))
stroke_read_model.add(Dropout(0.4))
stroke_read_model.add(Dense(len(word_encoder.classes_), activation = 'softmax'))
#adam = optimizers.Adam(lr=0.0001)
adam_acc = Adam_accumulate(accum_iters=3)
stroke_read_model.compile(optimizer = adam_acc, # Combines updates for 5 iterations.
                          loss = 'categorical_crossentropy', 
                          metrics = ['categorical_accuracy', top_3_accuracy])
stroke_read_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_10 (Batc (None, None, 3)           12        
_________________________________________________________________
conv1d_37 (Conv1D)           (None, None, 128)         2048      
_________________________________________________________________
dropout_73 (Dropout)         (None, None, 128)         0         
_________________________________________________________________
max_pooling1d_19 (MaxPooling (None, None, 128)         0         
_________________________________________________________________
conv1d_38 (Conv1D)           (None, None, 256)         98560     
_________________________________________________________________
dropout_74 (Dropout)         (None, None, 256)         0         
_________________________________________________________________
conv1d_39 (Conv1D)           (None, None, 256)         196864    
__________

In [38]:
weight_path="{}_weights.best.hdf5".format('stroke_lstm_model')

checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, 
                             save_best_only=True, mode='min', save_weights_only = True)

reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.6, patience=3, 
                                   verbose=1, mode='auto', cooldown=3, min_lr=0.000005)
early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=10) # probably needs to be more patient, but kaggle time is limited
callbacks_list = [checkpoint, early, reduceLROnPlat]

In [39]:
# from keras.callbacks import Callback
# class OutputClearNEpoch(Callback):
#     def on_epoch_end(self, epoch, logs={}):
#         current = logs.get(self.monitor)
#         if epoch % 5 == 0:
#             clear_output()

In [40]:
from IPython.display import clear_output
stroke_read_model.fit(train_X, train_y,
                      validation_data = (valid_X, valid_y), 
                      batch_size = batch_size,
                      epochs = 150,
                      callbacks = callbacks_list)
#clear_output()

Train on 600000 samples, validate on 150000 samples
Epoch 1/150

Epoch 00001: val_loss improved from inf to 4.60736, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 2/150

Epoch 00002: val_loss improved from 4.60736 to 4.26777, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 3/150

Epoch 00003: val_loss improved from 4.26777 to 3.98150, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 4/150

Epoch 00004: val_loss improved from 3.98150 to 3.43071, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 5/150

Epoch 00005: val_loss improved from 3.43071 to 2.78346, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 6/150

Epoch 00006: val_loss improved from 2.78346 to 2.38624, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 7/150

Epoch 00007: val_loss improved from 2.38624 to 2.03389, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 8/150

Epoch 00008: val_loss improved from 2.03389 to 1.84336, saving model to stroke_lstm_model_we


Epoch 00049: val_loss did not improve from 0.92180
Epoch 50/150

Epoch 00050: val_loss did not improve from 0.92180
Epoch 51/150

Epoch 00051: val_loss improved from 0.92180 to 0.92031, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 52/150

Epoch 00052: val_loss did not improve from 0.92031
Epoch 53/150

Epoch 00053: val_loss improved from 0.92031 to 0.91766, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 54/150

Epoch 00054: val_loss did not improve from 0.91766
Epoch 55/150

Epoch 00055: val_loss improved from 0.91766 to 0.91316, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 56/150

Epoch 00056: val_loss improved from 0.91316 to 0.91134, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 57/150

Epoch 00057: val_loss improved from 0.91134 to 0.89905, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 58/150

Epoch 00058: val_loss improved from 0.89905 to 0.89692, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 59/150

Epoch 0


Epoch 00101: val_loss did not improve from 0.83056
Epoch 102/150

Epoch 00102: val_loss did not improve from 0.83056

Epoch 00102: ReduceLROnPlateau reducing learning rate to 7.775999838486313e-05.
Epoch 103/150

Epoch 00103: val_loss improved from 0.83056 to 0.83038, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 104/150

Epoch 00104: val_loss did not improve from 0.83038
Epoch 105/150

Epoch 00105: val_loss did not improve from 0.83038
Epoch 106/150

Epoch 00106: val_loss improved from 0.83038 to 0.82987, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 107/150

Epoch 00107: val_loss improved from 0.82987 to 0.82972, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 108/150

Epoch 00108: val_loss improved from 0.82972 to 0.82883, saving model to stroke_lstm_model_weights.best.hdf5
Epoch 109/150

Epoch 00109: val_loss did not improve from 0.82883
Epoch 110/150

Epoch 00110: val_loss did not improve from 0.82883
Epoch 111/150

Epoch 00111: val_loss did not 

<keras.callbacks.History at 0x7fde77360278>

In [41]:
stroke_read_model.load_weights(weight_path)
lstm_results = stroke_read_model.evaluate(test_X, test_y, batch_size = 4096)
print('Accuracy: %2.1f%%, Top 3 Accuracy %2.1f%%' % (100*lstm_results[1], 100*lstm_results[2]))

Accuracy: 78.6%, Top 3 Accuracy 91.2%
