In [None]:
!pip install quickdraw

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting quickdraw
  Downloading quickdraw-0.2.0-py3-none-any.whl (10 kB)
Installing collected packages: quickdraw
Successfully installed quickdraw-0.2.0


In [None]:
import os
import random

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

import itertools
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

from quickdraw import QuickDrawData, QuickDrawDataGroup

In [None]:
seed = 111
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

np.random.seed(seed)
random.seed(seed)

device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
num_img_per_class = 5000
qd = QuickDrawData(max_drawings=num_img_per_class)

In [None]:
class_list = ['apple', 'wine bottle', 'spoon', 'rainbow', 'panda', 'hospital', 'scissors', 'toothpaste', 'baseball', 'hourglass']
class_dict = {'apple':0, 'wine bottle':1, 'spoon':2, 'rainbow': 3, 'panda':4, 'hospital':5, 'scissors':6, 'toothpaste': 7, 'baseball': 8, 'hourglass': 9}

In [None]:
qd.load_drawings(class_list)

downloading apple from https://storage.googleapis.com/quickdraw_dataset/full/binary/apple.bin
download complete
loading apple drawings
load complete
downloading wine bottle from https://storage.googleapis.com/quickdraw_dataset/full/binary/wine bottle.bin
download complete
loading wine bottle drawings
load complete
downloading spoon from https://storage.googleapis.com/quickdraw_dataset/full/binary/spoon.bin
download complete
loading spoon drawings
load complete
downloading rainbow from https://storage.googleapis.com/quickdraw_dataset/full/binary/rainbow.bin
download complete
loading rainbow drawings
load complete
downloading panda from https://storage.googleapis.com/quickdraw_dataset/full/binary/panda.bin
download complete
loading panda drawings
load complete
downloading hospital from https://storage.googleapis.com/quickdraw_dataset/full/binary/hospital.bin
download complete
loading hospital drawings
load complete
downloading scissors from https://storage.googleapis.com/quickdraw_datase

In [None]:
train_data = list()
val_data = list()
train_label = list()
val_label = list()


for class_name in class_list:
  qdgroup = QuickDrawDataGroup(class_name, max_drawings=num_img_per_class)
  for i, img in enumerate(qdgroup.drawings):
    if i < int(0.9 * num_img_per_class):
      train_data.append(np.asarray(img.get_image()))
      train_label.append(class_dict[class_name])
    else:
      val_data.append(np.asarray(img.get_image()))
      val_label.append(class_dict[class_name])

loading apple drawings
load complete
loading wine bottle drawings
load complete
loading spoon drawings
load complete
loading rainbow drawings
load complete
loading panda drawings
load complete
loading hospital drawings
load complete
loading scissors drawings
load complete
loading toothpaste drawings
load complete
loading baseball drawings
load complete
loading hourglass drawings
load complete


In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((227, 227)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [None]:
class QuickDrawDataset(Dataset):
  def __init__(self, data, labels, transform=None):
    self.data = data
    self.labels = labels
    self.transform = transform

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    img = self.data[idx]
    label = self.labels[idx]

    if self.transform:
      img = transform(img)
    
    return img, label

In [None]:
class ConvBlock(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True, norm="bnorm", relu=0.0):
    super().__init__()

    layers = []
    layers += [nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 
                         kernel_size=kernel_size, stride=stride, padding=padding, bias=bias)]
              
    if not norm is None:
      layers += [nn.BatchNorm2d(num_features=out_channels)]
    
    if not relu is None:
      layers += [nn.ReLU()]

    self.cbr = nn.Sequential(*layers)

  def forward(self, x):
    return self.cbr(x)

In [None]:
class ResBlock(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size=3, stride=1,
               padding=1, bias=True, norm="bnorm", relu=0.0, short_cut=False):
    
    super().__init__()

    layers = []

    layers += [ConvBlock(in_channels=in_channels, out_channels=out_channels,
                         kernel_size=kernel_size, stride=stride, padding=padding, 
                         bias=bias, norm=norm, relu=relu)]

    layers += [ConvBlock(in_channels=out_channels, out_channels=out_channels,
                         kernel_size=kernel_size, stride=stride, padding=padding, 
                         bias=bias, norm=norm, relu=None)]

    self.resblk = nn.Sequential(*layers)
    self.short_cut = nn.Conv2d(in_channels, out_channels, (1,1), stride=1)

  def forward(self, x, short_cut=False):
    if short_cut: 
      return self.short_cut(x) + self.resblk(x)
    else:
      return x + self.resblk(x)

In [None]:
class ResNet(nn.Module): 
  def __init__(self, in_channels, out_channels, nker=64, norm="bnorm", nblk=[3,4,6,3]):
    super(ResNet, self).__init__()

    self.enc = ConvBlock(in_channels, nker, kernel_size=7, stride=2, padding=1, bias=True, norm=None, relu=0.0)
    self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    res_1 = ResBlock(nker, nker, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=0.0)
    self.res_1 = nn.Sequential(*[res_1 for _ in range(nblk[0])])
    
    res_2 = ResBlock(nker*2, nker*2, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=0.0)
    self.res_2_up = ResBlock(nker, nker*2, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=0.0)
    self.res_2 = nn.Sequential(*[res_2 for _ in range(nblk[1] - 1)])

    res_3 = ResBlock(nker*2*2, nker*2*2, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=0.0)
    self.res_3_up = ResBlock(nker*2, nker*2*2, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=0.0)
    self.res_3 = nn.Sequential(*[res_3 for _ in range(nblk[2] - 1)])

    res_4 = ResBlock(nker*2*2*2, nker*2*2*2, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=0.0)
    self.res_4_up = ResBlock(nker*2*2, nker*2*2*2, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=0.0)
    self.res_4 = nn.Sequential(*[res_4 for _ in range(nblk[3] - 1)])

    self.avg_pooling = nn.AdaptiveAvgPool2d(output_size=1)
    self.fc = nn.Linear(nker*2*2*2, out_channels)

  def forward(self, x):
    x = self.enc(x)
    x = self.max_pool(x)
    x = self.res_1(x)
    x = self.max_pool(x)

    x = self.res_2_up(x, short_cut=True)
    x = self.res_2(x)
    x = self.max_pool(x)

    x = self.res_3_up(x, short_cut=True)
    x = self.res_3(x)
    x = self.max_pool(x)

    x = self.res_4_up(x, short_cut=True)
    x = self.res_4(x)

    x = self.avg_pooling(x)
    x = x.view(x.shape[0], -1)

    out = self.fc(x)

    return out

In [None]:
model_test = ResNet(3, 10)

x = torch.randn((4, 3, 227, 227))

out = model_test(x)

print("Output tensor shape is :", out.shape)

Output tensor shape is : torch.Size([4, 10])


In [None]:
model = ResNet(3, 10)
model.to("cuda")

ResNet(
  (enc): ConvBlock(
    (cbr): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(1, 1))
      (1): ReLU()
    )
  )
  (max_pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (res_1): Sequential(
    (0): ResBlock(
      (resblk): Sequential(
        (0): ConvBlock(
          (cbr): Sequential(
            (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
            (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU()
          )
        )
        (1): ConvBlock(
          (cbr): Sequential(
            (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
            (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
      )
      (short_cut): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    )
    (1): ResBlock(
      (resblk): Sequential(
        (0): ConvBlock

In [None]:
log_dir = './log'

In [None]:
qd_train_dataset = QuickDrawDataset(train_data, train_label, transform)
qd_val_dataset = QuickDrawDataset(val_data, val_label, transform)

qd_train_dataloader = DataLoader(qd_train_dataset, batch_size=4, shuffle=True)
qd_val_dataloader = DataLoader(qd_val_dataset, batch_size=4, shuffle=True)

In [None]:
from torch.optim import Adam

criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-4)

In [None]:
class AverageMeter(object):
  def __init__(self):
    self.reset()

  def reset(self):
    self.val = 0
    self.avg = 0
    self.sum = 0
    self.count = 0

  def update(self, val, n=1):
    self.val = val
    self.sum += val * n
    self.count += n
    self.avg = self.sum / self.count

In [None]:
every_iter = 20
os.makedirs(log_dir, exist_ok=True)

with open(os.path.join(log_dir, 'train_log.csv'), 'w') as log:
  model.train()
  for iter, (img, label) in enumerate(qd_train_dataloader):
    optimizer.zero_grad()

    img, label = img.float().cuda(), label.long().cuda()

    pred_logit = model(img)

    loss = criterion(pred_logit, label)

    loss.backward()
    optimizer.step()

    pred_label = torch.argmax(pred_logit, 1)
    acc = (pred_label == label).sum().item() / len(img)

    train_loss = loss.item()
    train_acc = acc

    if (iter % every_iter == 0) or (iter == len(qd_train_dataloader) - 1):
      model.eval()
      valid_loss, valid_acc = AverageMeter(), AverageMeter()

      for img, label in qd_val_dataloader:
        img, label = img.float().cuda(), label.long().cuda()

        with torch.no_grad():
          pred_logit = model(img)

        loss = criterion(pred_logit, label)

        pred_label = torch.argmax(pred_logit, 1)
        acc = (pred_label == label).sum().item() / len(img)

        valid_loss.update(loss.item(), len(img))
        valid_acc.update(acc, len(img))


      valid_loss = valid_loss.avg
      valid_acc = valid_acc.avg

      print("Iter [%3d/%3d] | Train Loss %.4f | Train Acc %.4f | Valid Loss %.4f | Valid Acc %.4f" %
            (iter, len(qd_train_dataloader), train_loss, train_acc, valid_loss, valid_acc))
      
      log.write('%d,%.4f,%.4f,%.4f,%.4f\n'%(iter, train_loss, train_acc, valid_loss, valid_acc))

Iter [  0/11250] | Train Loss 5.1125 | Train Acc 0.0000 | Valid Loss 2.4665 | Valid Acc 0.1000
Iter [ 20/11250] | Train Loss 1.8505 | Train Acc 0.2500 | Valid Loss 2.3288 | Valid Acc 0.1604
Iter [ 40/11250] | Train Loss 2.2398 | Train Acc 0.0000 | Valid Loss 2.1241 | Valid Acc 0.1826
Iter [ 60/11250] | Train Loss 2.3631 | Train Acc 0.2500 | Valid Loss 2.0791 | Valid Acc 0.3040
Iter [ 80/11250] | Train Loss 1.5112 | Train Acc 0.2500 | Valid Loss 1.9507 | Valid Acc 0.3146
Iter [100/11250] | Train Loss 1.9915 | Train Acc 0.2500 | Valid Loss 2.1455 | Valid Acc 0.2252
Iter [120/11250] | Train Loss 1.8171 | Train Acc 0.2500 | Valid Loss 1.9328 | Valid Acc 0.2476
Iter [140/11250] | Train Loss 1.4284 | Train Acc 0.5000 | Valid Loss 2.3150 | Valid Acc 0.2186
Iter [160/11250] | Train Loss 1.4717 | Train Acc 0.5000 | Valid Loss 1.8476 | Valid Acc 0.3652
Iter [180/11250] | Train Loss 1.7674 | Train Acc 0.2500 | Valid Loss 1.9017 | Valid Acc 0.3148
Iter [200/11250] | Train Loss 1.4157 | Train Acc 0