In [None]:
import urllib.request
from tqdm import tqdm
import os 
import glob
import numpy as np
import pandas as pd

In [None]:
# class_names = ['sun', 'face', 'ant', 'apple', 'bird', 'airplane', 'crown', 'cup', 'eyeglasses', 'fish', 'envelope', 'flower', 'house', 'moon', 'rabbit', 'shark', 'star', 'strawberry', 'umbrella', 'saxophone', 'television', 'teapot', 'sword',  'octopus', 'pineapple', 'scissors', 'snail']
class_names = ['sun', 'face', 'ant', 'bird', 'crown', 'fish', 'flower', 'house', 'star', 'envelope']

In [None]:
# download 
!mkdir dataset

for label in tqdm(class_names):
  print('Downloading {} file...'.format(label))
  endpoint = 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/{}.npy'
  dataset_path = 'dataset/{}.npy' 
  urllib.request.urlretrieve(endpoint.format(label), dataset_path.format(label))

In [None]:
files = glob.glob(os.path.join('dataset/', '*.npy'))

x = np.empty([0, 784])
y = np.empty([0])
lbls = {}
# sort alphabatically
files.sort()

for id, file in tqdm(enumerate(files)):
  arrs = np.load(file)[0: 84000]  # get only 830000 records per each
  labels = np.full(arrs.shape[0], id)  # make array with labels
  
  x = np.concatenate((x, arrs), axis=0)
  y = np.concatenate((y, labels), axis=0)

  lbls[id] = file.split('/')[-1].split('.')[0]
x.shape, y.shape
print('\n', lbls)

In [None]:
df = pd.DataFrame({'image': list(x), 'label': y})
df['label'] = df['label'].astype(int)

In [None]:
# shuffle dataframe
from sklearn import utils


df = utils.shuffle(df).reset_index(drop=True)
df.head()

In [None]:
from sklearn.model_selection import train_test_split

# independant dependant var
x = df['image']
y = df['label']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.001, random_state=5, stratify=y)

In [None]:
# show random images from training set
import matplotlib.pyplot as plt
from random import randint
for i in range(5):
    id=0
    try:
        plt.imshow(x_train[i].reshape(28, 28))
    except:
        continue
    idx = (-y_train[i]).argsort()[0]
    plt.show()


Preprocessing

In [None]:
x_train = np.array(x_train.tolist()).reshape(x_train.shape[0], 28, 28, 1).astype('float32')
x_test = np.array(x_test.tolist()).reshape(x_test.shape[0], 28, 28, 1).astype('float32')

x_train.shape, x_test.shape


In [None]:
import tensorflow as tf

y_train = tf.keras.utils.to_categorical(y_train, len(class_names))
y_test = tf.keras.utils.to_categorical(y_test, len(class_names))

In [None]:
from keras.preprocessing.image import ImageDataGenerator


image_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=90,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.1,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest',
)

In [None]:
train_generator = image_gen.flow(x_train, y_train)
test_generator = image_gen.flow(x_test, y_test)

In [None]:
sample_img = x_train[0]
plt.imshow(sample_img)


In [None]:
def plot_images(images_arr):
    fig, axes = plt.subplots(1, 10, figsize=(28, 28))
    axes = axes.flatten()
    
    for img, ax in zip(images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

In [None]:
plt.imshow(sample_img)
sample_img *= 255
aug_iter = image_gen.flow(sample_img.reshape(1, 28, 28, 1))
aug_samples = [next(aug_iter)[0].astype(np.uint8) for i in range(10)]
plot_images(aug_samples)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D


model = Sequential()
# conv layer 1
model.add(Conv2D(6, kernel_size=(3, 3), padding='same', activation='relu', input_shape=(28, 28, 1)))
model.add(BatchNormalization())
# conv layer 2
model.add(Conv2D(16, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(BatchNormalization())
# pool 1
model.add(MaxPooling2D(pool_size=(3, 3)))
# conv layer 3
model.add(Conv2D(16, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(BatchNormalization())
# conv layer 4
model.add(Conv2D(16, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(BatchNormalization())
# pool 2
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(BatchNormalization())

# 92 % reached architecture
model.add(Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(BatchNormalization())

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(class_names), activation='softmax'))
model.summary()

In [None]:
from tensorflow.keras.optimizers import Adam

model.compile(loss='categorical_crossentropy',
             optimizer=Adam(learning_rate=1e-4),
             metrics=['accuracy']
             )

In [None]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ReduceLROnPlateau


early_stopping = EarlyStopping(patience=5, restore_best_weights=True)

In [None]:
hist = model.fit(
    train_generator,
    epochs=50,
    verbose=1,
    validation_data=test_generator,
    callbacks=[early_stopping]
    )

In [None]:
import matplotlib.pyplot as plt

def gen_graph(history, title):
    
  plt.plot(history.history['accuracy'])
  plt.plot(history.history['val_accuracy'])
  plt.title('model accuracy')
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.legend(['train', 'val'], loc='upper left')
  plt.show()

  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('model loss')
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.legend(['train', 'val'], loc='upper left')
  plt.show()
  plt.show()

In [None]:
import matplotlib.pyplot as plt

def gen_graph(history, title):
    
  plt.plot(history.history['accuracy'])
  plt.plot(history.history['val_accuracy'])
  plt.title('model accuracy')
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.legend(['train', 'val'], loc='upper left')
  plt.show()

  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('model loss')
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.legend(['train', 'val'], loc='upper left')
  plt.show()
  plt.show()

In [None]:
gen_graph(hist, 'Doodle CNN')

In [None]:
model.save('model_perfect_deep_fully_trained_model.h5')

In [None]:
import cv2

def get_processed_input_img(image_path, size=28):
  test_img =  cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
  kernel = np.ones((3, 3), np.uint8)
  test_img = cv2.erode(test_img, kernel, iterations=1)
#   print(test_img.shape)
  test_img = cv2.resize(test_img, dsize=(size, size), interpolation=cv2.INTER_AREA)
  thresh = 200
  test_img = cv2.threshold(test_img, thresh, 255, cv2.THRESH_BINARY_INV)[1]
#   cv2.imwrite('1s.png', test_img)
  # plt.imshow(test_img)
  test_img = test_img.reshape((1, size, size, 1)).astype(np.float32)
  
  return test_img/255

img = get_processed_input_img('/content/f.png')


# print(x_test[9], '\n\n\n\n')
print(img)


In [None]:
lbls

In [None]:
pred = model.predict(img)
pred

In [None]:
best_5 = (-pred).argsort()[0]
best_5

In [None]:
print([lbls[i] for i in best_5])

In [None]:
imgs = ['apple', 'f', 'ant', 'sun', 'bird']


for i in imgs:
  img = get_processed_input_img('/content/{}.png'.format(i))

  plt.imshow((img * 255).reshape(28, 28))
  plt.show()

  pred = model.predict(img)[0]
  print(pred)
  best_5 = (-pred).argsort()
  print(best_5)
  print([lbls[i] for i in best_5])