# **Doodle Classifier**

### This is a classifier to predict label of hand drawn doodle images. The idea is based on [QuickDraw](https://quickdraw.withgoogle.com/#) by Google. The [dataset](https://github.com/googlecreativelab/quickdraw-dataset) they provide contains 50 million images across 345 categories! I am using a subset of 50 categories for my model because of limited resources but one can use the same code with small tweaks to train on all 345 categories.

In [None]:
from urllib.request import urlretrieve
from PIL import Image
import numpy as np
import os
from pathlib import Path

np.random.seed(0)


Path("./npy_files").mkdir(parents=True, exist_ok=True)
Path("./Data").mkdir(parents=True, exist_ok=True)
Path("./Data/train").mkdir(parents=True, exist_ok=True)
Path("./Data/test").mkdir(parents=True, exist_ok=True)

classes = ['airplane', 'mailbox','fish', 'face','bowtie', 'butterfly', 'umbrella', 'syringe', 'star', 'elephant','hammer', 'key',  'knife', 'ice_cream', 'hand', 'flower', 'fork', 'wheel', 'wine_glass', 'cloud', 'microphone', 'cat', 'baseball','crab', 'crocodile', 'dolphin', 'ant', 'anvil', 'apple', 'axe', 'banana', 'bicycle', 'binoculars', 'bird', 'birthday_cake', 'mushroom', 'octopus', 'screwdriver', 'shark', 'sheep', 'shoe',  'snake',  'snowflake', 'snowman', 'spider', 'camera', 'campfire', 'candle', 'cannon', 'car']

# ref: download() function from https://github.com/yining1023/doodleNet/blob/master/doodleNet.ipynb
base = 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/'
for c in classes:
    cls_url = c.replace('_', '%20')
    path = base+cls_url+'.npy'
    print(path)
    urlretrieve(path, 'npy_files/'+c+'.npy')


# converting to png format so that image augmentations can be applied, also organising
# into directories
for c in classes:
    Path("./Data/train/{}".format(c)).mkdir(parents=True, exist_ok=True)
    Path("./Data/test/{}".format(c)).mkdir(parents=True, exist_ok=True)
    c_img = np.load('npy_files/'+c+'.npy')

    # sample 55000 images from first 95000 images for training
    train_set = (c_img[:95000])[np.random.choice(c_img[:95000].shape[0], size=55000, replace=False)]
    # sample 8000 images from rest of the images for testing
    test_set = (c_img[95000:])[np.random.choice(c_img[95000:].shape[0], size=8000, replace=False)]
    
    for i, img in enumerate(train_set):
        img = img.reshape((28,28))
        img = Image.fromarray(img , 'L')
        img.save('./Data/train/{}/{}_train_{}.png'.format(c,c,i))

    for i, img in enumerate(test_set):
        img = img.reshape((28,28))
        img = Image.fromarray(img , 'L')
        img.save('./Data/test/{}/{}_test_{}.png'.format(c,c,i))
    print(c + ' done')

https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/airplane.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/mailbox.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/fish.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/face.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/bowtie.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/butterfly.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/umbrella.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/syringe.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/star.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/elephant.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/hammer.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/key.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bi

In [None]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler
import random

# for reproducibility
torch.manual_seed(0)
random.seed(0)

In [None]:
num_workers = 10
batch_size = 32
valid_size = 0.15

transform = transforms.Compose([transforms.Grayscale(),
                                transforms.RandomRotation(30),
                                transforms.RandomHorizontalFlip(),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5),
                                                  (0.5))])

# no augmentations applied for test data 
test_transform = transforms.Compose([transforms.Grayscale(),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5),
                                                     (0.5))])

train_data = datasets.ImageFolder('./Data/train',  transform=transform)

test_data = datasets.ImageFolder('./Data/test',  transform=test_transform)

# obtain training indices that will be used for validation
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

# prepare data loaders (combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, 
    sampler=valid_sampler, num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
    num_workers=num_workers)


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
classes = sorted(['airplane', 'mailbox','fish', 'face','bowtie', 'butterfly', 'umbrella', 'syringe', 'star', 'elephant','hammer', 'key',  'knife', 'ice_cream', 'hand', 'flower', 'fork', 'wheel', 'wine_glass', 'cloud', 'microphone', 'cat', 'baseball','crab', 'crocodile', 'dolphin', 'ant', 'anvil', 'apple', 'axe', 'banana', 'bicycle', 'binoculars', 'bird', 'birthday_cake', 'mushroom', 'octopus', 'screwdriver', 'shark', 'sheep', 'shoe',  'snake',  'snowflake', 'snowman', 'spider', 'camera', 'campfire', 'candle', 'cannon', 'car'])

In [None]:
from torch import optim

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99, last_epoch=-1)

The covnet architecture

In [None]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
    self.conv2 = nn.Conv2d(32, 32, 3, padding=1)
    self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
    self.conv4 = nn.Conv2d(64, 64, 3, padding=1)
    self.conv5 = nn.Conv2d(64, 128, 3, padding=1)
    self.conv6= nn.Conv2d(128, 128, 3, padding=1)
    self.conv7= nn.Conv2d(128, 256, 3, padding=1)
    self.conv8= nn.Conv2d(256, 256, 3, padding=1)

    self.dropout1 = nn.Dropout2d(0.3)
    self.dropout2 = nn.Dropout2d(0.3)
    self.dropout3 = nn.Dropout2d(0.3)
    self.dropout4 = nn.Dropout2d(0.3)
    self.dropout5 = nn.Dropout(0.5)

    self.pool1 = nn.MaxPool2d(2, 2)
    self.pool2 = nn.MaxPool2d(2, 2)
    self.pool3 = nn.MaxPool2d(2, 2, padding=1)
    self.pool4 = nn.MaxPool2d(2, 2)
    self.avgpool = nn.AvgPool2d(2, 1)
    
    self.fc1 = nn.Linear(256, len(classes))


  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = F.relu(self.conv2(x))
    x = self.dropout1(self.pool1(x))

    x = F.relu(self.conv3(x))
    x = F.relu(self.conv4(x))
    x = self.dropout2(self.pool2(x))

    x = F.relu(self.conv5(x))
    x = F.relu(self.conv6(x))
    x = self.dropout3(self.pool3(x))

    x = F.relu(self.conv7(x))
    x = F.relu(self.conv8(x))
    x = self.dropout4(self.pool4(x))

    x = self.avgpool(x)

    x = x.view(-1, 256)

    x = self.dropout5(x)
    x = self.fc1(x)
    x = F.log_softmax(x, dim=1)
    
    return x

In [None]:
model = Net()
model

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv6): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv7): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (dropout1): Dropout2d(p=0.3, inplace=False)
  (dropout2): Dropout2d(p=0.3, inplace=False)
  (dropout3): Dropout2d(p=0.3, inplace=False)
  (dropout4): Dropout2d(p=0.3, inplace=False)
  (dropout5): Dropout(p=0.5, inplace=False)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, di

Defining loss function, optimizer to optimize the parameters and a scheduler to exponentially decrement learing rate.

In [None]:
n_epochs = 100

valid_loss_min = np.inf #0.062003

# move the model to gpu if available else cpu
model = model.to(device)

for epoch in range(n_epochs):
  train_loss = 0.0
  valid_loss = 0.0

  for data, target in train_loader:
    # move the data and target to gpu if available else cpu
    data = data.to(device)
    target = target.to(device)
    
    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)
    train_loss += loss.item()*data.shape[0]
    loss.backward()
    optimizer.step()

  with torch.no_grad():
    model.eval()
    for data, target in valid_loader:
      data = data.to(device)
      target = target.to(device)
      output = model(data)
      loss = criterion(output, target)
      valid_loss += loss.item()*data.shape[0]
    
    train_loss = train_loss/len(train_loader.dataset)
    valid_loss = valid_loss/len(valid_loader.dataset)
        
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch+1, train_loss, valid_loss))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(model.state_dict(), './model.pt')
        valid_loss_min = valid_loss
  
  model.train()
  if (epoch+1)%10 == 0:
    scheduler.step()


In [None]:
# load the best model based on validation loss
model.load_state_dict(torch.load('./model.pt', map_location=torch.device('cpu')))

<All keys matched successfully>

The test loop.

In [None]:
# track test loss
model = model.to(device)
test_loss = 0.0
class_correct = list(0. for i in range(len(classes)))
class_total = list(0. for i in range(len(classes)))

model.eval()
# iterate over test data
for data, target in test_loader:
    # move tensors to GPU if CUDA is available
    data, target = data.to(device), target.to(device)
    output = model(data)
    loss = criterion(output, target)
    test_loss += loss.item()*data.size(0)
    _, pred = torch.max(output, 1)   

    # compare predictions to true label
    correct_tensor = pred.eq(target.data.view_as(pred))
    correct = np.squeeze(correct_tensor.cpu().numpy())
    
    # calculate test accuracy for each object class
    for i in range(batch_size):
        if i >= target.data.shape[0]:
            break
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

# average test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(len(classes)):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            classes[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

## Now we can convert the model to onnx format so that we can use the model on the browser.

Here I have copied the model from above with some minor change in the forward method. This is useful because the data will be 28x28 canvas image. Hence we need to normalize the image so it is in the same range the model expects.

In [None]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
    self.conv2 = nn.Conv2d(32, 32, 3, padding=1)
    self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
    self.conv4 = nn.Conv2d(64, 64, 3, padding=1)
    self.conv5 = nn.Conv2d(64, 128, 3, padding=1)
    self.conv6= nn.Conv2d(128, 128, 3, padding=1)
    self.conv7= nn.Conv2d(128, 256, 3, padding=1)
    self.conv8= nn.Conv2d(256, 256, 3, padding=1)

    self.dropout1 = nn.Dropout2d(0.3)
    self.dropout2 = nn.Dropout2d(0.3)
    self.dropout3 = nn.Dropout2d(0.3)
    self.dropout4 = nn.Dropout2d(0.3)
    self.dropout5 = nn.Dropout(0.5)

    self.pool1 = nn.MaxPool2d(2, 2)
    self.pool2 = nn.MaxPool2d(2, 2)
    self.pool3 = nn.MaxPool2d(2, 2, padding=1)
    self.pool4 = nn.MaxPool2d(2, 2)
    self.avgpool = nn.AvgPool2d(2, 1)
    
    self.fc1 = nn.Linear(256, len(classes))


  def forward(self, x):
    # normalizing the data
    x = (x - 0.5)/0.5

    x = F.relu(self.conv1(x))
    x = F.relu(self.conv2(x))
    x = self.dropout1(self.pool1(x))

    x = F.relu(self.conv3(x))
    x = F.relu(self.conv4(x))
    x = self.dropout2(self.pool2(x))

    x = F.relu(self.conv5(x))
    x = F.relu(self.conv6(x))
    x = self.dropout3(self.pool3(x))

    x = F.relu(self.conv7(x))
    x = F.relu(self.conv8(x))
    x = self.dropout4(self.pool4(x))

    x = self.avgpool(x)

    x = x.view(-1, 256)

    x = self.dropout5(x)
    x = self.fc1(x)
    # need the per class scores for visualization
    x = F.softmax(x, dim=1)
    
    return x

In [None]:
model = Net()
model

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv6): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv7): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (dropout1): Dropout2d(p=0.3, inplace=False)
  (dropout2): Dropout2d(p=0.3, inplace=False)
  (dropout3): Dropout2d(p=0.3, inplace=False)
  (dropout4): Dropout2d(p=0.3, inplace=False)
  (dropout5): Dropout(p=0.5, inplace=False)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, di

In [None]:
model.load_state_dict(torch.load('./model.pt', map_location=torch.device('cpu')))

<All keys matched successfully>

In [None]:
model.eval()
dummy_input = torch.zeros(1,1,28,28)
torch.onnx.export(model, dummy_input, './onnx_model.onnx', verbose=True)

graph(%0 : Float(1:784, 1:784, 28:28, 28:1, requires_grad=0, device=cpu),
      %conv1.weight : Float(32:9, 1:9, 3:3, 3:1, requires_grad=1, device=cpu),
      %conv1.bias : Float(32:1, requires_grad=1, device=cpu),
      %conv2.weight : Float(32:288, 32:9, 3:3, 3:1, requires_grad=1, device=cpu),
      %conv2.bias : Float(32:1, requires_grad=1, device=cpu),
      %conv3.weight : Float(64:288, 32:9, 3:3, 3:1, requires_grad=1, device=cpu),
      %conv3.bias : Float(64:1, requires_grad=1, device=cpu),
      %conv4.weight : Float(64:576, 64:9, 3:3, 3:1, requires_grad=1, device=cpu),
      %conv4.bias : Float(64:1, requires_grad=1, device=cpu),
      %conv5.weight : Float(128:576, 64:9, 3:3, 3:1, requires_grad=1, device=cpu),
      %conv5.bias : Float(128:1, requires_grad=1, device=cpu),
      %conv6.weight : Float(128:1152, 128:9, 3:3, 3:1, requires_grad=1, device=cpu),
      %conv6.bias : Float(128:1, requires_grad=1, device=cpu),
      %conv7.weight : Float(256:1152, 128:9, 3:3, 3:1, requ