# Quickdraw

load data  
preprocessing data  
pipeline  
make a model  
train   
evaluate   

In [1]:
!wget -qq https://www.dropbox.com/s/gdlb8dnjzcly51o/quickdraw.zip

!unzip -qq quickdraw.zip

!rm -r __MACOSX
!rm quickdraw.zip

!ls

quickdraw  sample_data


In [45]:
!ls quickdraw

'alarm clock.npy'   bee.npy		 cookie.npy	  lollipop.npy
 ambulance.npy	   'birthday cake.npy'	 donut.npy	 'palm tree.npy'
 angel.npy	    brain.npy		 eyeglasses.npy   pig.npy
 ant.npy	    cactus.npy		 face.npy	  postcard.npy
 banana.npy	    cat.npy		 fish.npy	  rainbow.npy


## Imports

In [46]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.cm as cm

import numpy as np
from PIL import Image as PImage

from glob import glob
import ntpath

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

import tensorflow as tf

In [47]:
arr = np.load('./quickdraw/bee.npy')
arr.shape

(120890, 784)

In [48]:
file_names = glob('./quickdraw/*.npy')

In [49]:
# make some class names
class_names = []

for file in file_names:
  name = ntpath.basename(file)
  class_names.append(name[:-4])
  
  
print(class_names)
print(len(class_names))

['brain', 'fish', 'ant', 'rainbow', 'cactus', 'donut', 'postcard', 'palm tree', 'eyeglasses', 'ambulance', 'cat', 'birthday cake', 'bee', 'lollipop', 'cookie', 'angel', 'banana', 'face', 'alarm clock', 'pig']
20


In [50]:
# get 200 of each class for plotting

x_data =np.array([])
y_labels =np.array([])

for i, filename in enumerate(file_names):
  labels = [i for j in range(200)]
  arr = np.load(filename)
  arr = arr[:200]
  if len(x_data) == 0:
    x_data = arr
    y_labels = np.asarray(labels)
  else:
    x_data = np.concatenate((x_data, arr))
    y_labels = np.concatenate((y_labels, labels))

In [51]:
y_labels.shape

(4000,)

In [52]:
y_labels[190:210]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

## For MNIST

In [53]:
# (mnist_x,mnist_y), (_,_) = tf.keras.datasets.mnist.load_data()
# mnist_x =mnist_x.reshape(60000,784)

In [54]:
# pca = PCA(n_components=2)
# principalComponents = pca.fit_transform(mnist_x)
# groups = mnist_y.tolist()
# colors = [int(i % 20) for i in groups]

# plt.scatter(principalComponents[:,0], principalComponents[:,1], c=colors,cmap=plt.cm.plasma)
# plt.show()

## PCA for QuickDraw

In [55]:
# from sklearn.preprocessing import StandardScaler
# x = StandardScaler().fit_transform(x_data)

# pca = PCA(n_components=2)
# principalComponents = pca.fit_transform(x_data[:2000])

# print(principalComponents.shape)

# groups = y_labels.tolist()
# colors = [int(i % 20) for i in groups[:2000]]

# plt.scatter(principalComponents[:,0], principalComponents[:,1], c=colors,cmap=plt.cm.plasma)
# plt.show()

## Plot TSNE  for QuickDraw

In [56]:
# import time

# time_start = time.time()
# tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
# tsne_results = tsne.fit_transform(x_data[:2000])

# print('t-SNE Finished! Time elapsed: {} seconds'.format(time.time()-time_start))

In [57]:
# import matplotlib.cm as cm

# groups = y_labels.tolist()

# colors = [int(i % 20) for i in groups[:2000]]

# plt.scatter(tsne_results[:,0], tsne_results[:,1], c=colors,cmap=plt.cm.plasma)
# plt.show()

In [58]:
x_data =np.array([])
y_labels =np.array([])

for i, filename in enumerate(file_names):
  labels = [i for j in range(10000)]
  arr = np.load(filename)
  arr = arr[:10000]
  if len(x_data) == 0:
    x_data = arr
    y_labels = np.asarray(labels)
  else:
    x_data = np.concatenate((x_data, arr))
    y_labels = np.concatenate((y_labels, labels))

In [59]:
x_data.shape, y_labels.shape

((200000, 784), (200000,))

In [60]:
y_labels[:50]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0])

In [61]:
# Shuffle
from sklearn.utils import shuffle

x_data, y_labels = shuffle(x_data, y_labels, random_state=42)


# Train test split
from sklearn.model_selection import train_test_split

split = 0.9

x_train, x_test, y_train, y_test = train_test_split(x_data, y_labels, test_size=0.1, random_state=42)


x_train.shape, y_train.shape

((180000, 784), (180000,))

In [62]:
y_labels[:50]

array([11,  7, 15,  6,  3,  2, 13, 17, 13,  0,  7, 14, 18,  8, 11, 11, 10,
       18,  0, 17, 16,  3,  2, 15,  0,  0,  0, 13, 19,  4, 13,  0,  2,  0,
       11,  9,  3, 12,  1,  6,  6, 16,  5, 12, 14, 14, 13,  7, 19,  6])

## Prepare data for network

In [63]:
image_size = 28

# Reshape
x_train = x_train.reshape(x_train.shape[0], image_size, image_size, 1).astype('float32')
x_test = x_test.reshape(x_test.shape[0], image_size, image_size, 1).astype('float32')

# Normalize
x_train /= 255.0
x_test /= 255.0

# Convert Class Vectors to Class Matrices
y_train = tf.keras.utils.to_categorical(y_train, len(class_names))
y_test = tf.keras.utils.to_categorical(y_test, len(class_names))

## Data Pipeline

In [64]:
input_shape = (28, 28, 3)
num_classes = 20
learning_rate = 0.01
batch_size = 128

In [65]:
def one_to_three(image, label):
  image = tf.image.grayscale_to_rgb(image)
  return image, label

## Model

In [66]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.map(one_to_three)
train_dataset = train_dataset.shuffle(10000)
train_dataset = train_dataset.repeat(1000)
train_dataset = train_dataset.batch(batch_size, drop_remainder=True)

In [67]:
train_dataset

<BatchDataset shapes: ((128, 28, 28, 3), (128, 20)), types: (tf.float32, tf.float32)>

In [68]:
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_dataset = val_dataset.map(one_to_three)
val_dataset = val_dataset.repeat(1000)
val_dataset = val_dataset.batch(batch_size, drop_remainder=True)

In [69]:
val_dataset

<BatchDataset shapes: ((128, 28, 28, 3), (128, 20)), types: (tf.float32, tf.float32)>

In [70]:
train_steps = int(len(train_dataset)/batch_size)
val_steps = int(len(val_dataset)/batch_size)

## Train

In [71]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dropout, Dense, GlobalMaxPooling2D, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [72]:
input = Input(shape = input_shape)
x = Conv2D(32, kernel_size = (3,3), activation='relu', name='Conv_01', padding='valid')(input)
x = MaxPool2D(pool_size=(2,2), name='MaxPool_1')(x)
x = Conv2D(64, kernel_size = (3,3), activation='relu', name='Conv_02', padding='valid')(x)
x = MaxPool2D(pool_size=(2,2), name='MaxPool_2')(x)
x = Conv2D(128, kernel_size = (3,3), activation='relu', name='Conv_03', padding='same')(x)
x = MaxPool2D(pool_size=(2,2), name='MaxPool_3')(x)
x = GlobalMaxPooling2D()(x)
output = Dense(20, activation='softmax', name='Dense_01')(x)

In [73]:
model = Model(input, output, name='CNN_model')

In [74]:
model.summary()

Model: "CNN_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 28, 28, 3)]       0         
_________________________________________________________________
Conv_01 (Conv2D)             (None, 26, 26, 32)        896       
_________________________________________________________________
MaxPool_1 (MaxPooling2D)     (None, 13, 13, 32)        0         
_________________________________________________________________
Conv_02 (Conv2D)             (None, 11, 11, 64)        18496     
_________________________________________________________________
MaxPool_2 (MaxPooling2D)     (None, 5, 5, 64)          0         
_________________________________________________________________
Conv_03 (Conv2D)             (None, 5, 5, 128)         73856     
_________________________________________________________________
MaxPool_3 (MaxPooling2D)     (None, 2, 2, 128)         0 

In [75]:
opt = Adam(learning_rate=learning_rate)

model.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics='accuracy'
              )

In [76]:
early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=2, verbose=1, mode='auto')

In [79]:
model.fit(train_dataset,
          steps_per_epoch=train_steps,
          epochs=10,
          validation_data = val_dataset,
          validation_steps=val_steps,
          callbacks=early_stopping
          )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f300764dc90>

## Evaluate

In [80]:
model.evaluate(val_dataset)



[0.5349754691123962, 0.8594427108764648]