*Give credit for code foundation here*

### Grab vocab list from Github
This can be changed at a later time. All that matter is that the vocab names are directly from QuickDraw and that the size of the output layer reflects the number of items in the text file.

In [1]:
!wget https://raw.githubusercontent.com/Capstone-Projects-2023-Fall/project-smartspeech/SS-85-Train-the-model-on-the-desired-drawings/backend/model/quickdraw-vocab.txt

--2023-10-21 16:53:16--  https://raw.githubusercontent.com/Capstone-Projects-2023-Fall/project-smartspeech/SS-85-Train-the-model-on-the-desired-drawings/backend/model/quickdraw-vocab.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 240 [text/plain]
Saving to: ‘quickdraw-vocab.txt.1’


2023-10-21 16:53:17 (14.7 MB/s) - ‘quickdraw-vocab.txt.1’ saved [240/240]



### Store class names into a list
Remove newline characters and replace all spaces with underscores so that the data is equally represented.

In [2]:
f = open('quickdraw-vocab.txt', 'r')
classes = f.readlines()
f.close()

In [3]:
classes = [c.replace('\n', '').replace(' ', '_') for c in classes]

### Download data from Google


In [4]:
!mkdir data

mkdir: cannot create directory ‘data’: File exists


In [5]:
import urllib.request
def download():
  base = 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/'
  for c in classes:
    cls_url = c.replace('_', '%20')
    path = base+cls_url+'.npy'
    print(path)
    urllib.request.urlretrieve(path, 'data/'+c+'.npy')

In [6]:
download()

https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/blueberry.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/bread.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/bridge.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/bus.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/car.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/carrot.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/cat.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/circle.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/cup.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/diamond.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/dog.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/flower.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/fork.npy


### Imports

In [7]:
import os
import glob
import numpy as np
from tensorflow.keras import layers
from tensorflow import keras
import tensorflow as tf

### Load data

In [8]:
def load_data(root, vfold_ratio=0.2, max_items_per_class= 10000 ):
    all_files = glob.glob(os.path.join(root, '*.npy'))

    #initialize variables
    x = np.empty([0, 784])
    y = np.empty([0])
    class_names = []

    #load each data file
    for idx, file in enumerate(all_files):
        data = np.load(file)
        data = data[0: max_items_per_class, :]
        labels = np.full(data.shape[0], idx)

        x = np.concatenate((x, data), axis=0)
        y = np.append(y, labels)

        class_name, ext = os.path.splitext(os.path.basename(file))
        class_names.append(class_name)

    data = None
    labels = None

    #randomize the dataset
    permutation = np.random.permutation(y.shape[0])
    x = x[permutation, :]
    y = y[permutation]

    #separate into training and testing
    vfold_size = int(x.shape[0]/100*(vfold_ratio*100))

    x_test = x[0:vfold_size, :]
    y_test = y[0:vfold_size]

    x_train = x[vfold_size:x.shape[0], :]
    y_train = y[vfold_size:y.shape[0]]
    return x_train, y_train, x_test, y_test, class_names

In [9]:
x_train, y_train, x_test, y_test, class_names = load_data('data')
num_classes = len(class_names)
image_size = 28

In [10]:
print(len(x_train))

264000


### Preprocess data

In [11]:
# Reshape and normalize
x_train = x_train.reshape(x_train.shape[0], image_size, image_size, 1).astype('float32')
x_test = x_test.reshape(x_test.shape[0], image_size, image_size, 1).astype('float32')

x_train /= 255.0
x_test /= 255.0

# Convert class vectors to class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

### Define model

In [12]:
# Define model
model = keras.Sequential()
model.add(layers.Convolution2D(16, (3, 3),
                        padding='same',
                        input_shape=x_train.shape[1:], activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Convolution2D(32, (3, 3), padding='same', activation= 'relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Convolution2D(64, (3, 3), padding='same', activation= 'relu'))
model.add(layers.MaxPooling2D(pool_size =(2,2)))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='tanh'))
model.add(layers.Dense(33, activation='softmax'))
# Train model
adam = tf.optimizers.Adam()
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['top_k_categorical_accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 16)        160       
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 16)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 32)        4640      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 7, 7, 32)          0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 7, 7, 64)          18496     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 3, 3, 64)          0

### Train model

In [13]:
model.fit(x = x_train, y = y_train, validation_split=0.1, batch_size = 256, verbose=2, epochs=5)

Epoch 1/5
929/929 - 11s - loss: 1.0695 - top_k_categorical_accuracy: 0.9071 - val_loss: 0.6998 - val_top_k_categorical_accuracy: 0.9494 - 11s/epoch - 12ms/step
Epoch 2/5
929/929 - 5s - loss: 0.6341 - top_k_categorical_accuracy: 0.9564 - val_loss: 0.6024 - val_top_k_categorical_accuracy: 0.9579 - 5s/epoch - 6ms/step
Epoch 3/5
929/929 - 5s - loss: 0.5466 - top_k_categorical_accuracy: 0.9632 - val_loss: 0.5465 - val_top_k_categorical_accuracy: 0.9627 - 5s/epoch - 5ms/step
Epoch 4/5
929/929 - 6s - loss: 0.4971 - top_k_categorical_accuracy: 0.9669 - val_loss: 0.5269 - val_top_k_categorical_accuracy: 0.9647 - 6s/epoch - 7ms/step
Epoch 5/5
929/929 - 5s - loss: 0.4609 - top_k_categorical_accuracy: 0.9699 - val_loss: 0.4883 - val_top_k_categorical_accuracy: 0.9663 - 5s/epoch - 6ms/step


<keras.src.callbacks.History at 0x7810d3b4e200>

### Test model

In [14]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test accuarcy: {:0.2f}%'.format(score[1] * 100))

Test accuarcy: 96.59%


### Store classes

In [15]:
with open('class_names.txt', 'w') as file_handler:
    for item in class_names:
        file_handler.write("{}\n".format(item))

### Export weights and relevant files

In [22]:
!pip install tensorflowjs

Collecting tensorflowjs
  Downloading tensorflowjs-4.12.0-py3-none-any.whl (89 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/89.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.2/89.2 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow-decision-forests>=1.5.0 (from tensorflowjs)
  Downloading tensorflow_decision_forests-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.8/16.8 MB[0m [31m71.4 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow<3,>=2.13.0 (from tensorflowjs)
  Downloading tensorflow-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (489.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m489.8/489.8 MB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Collecting wurlitzer (from tensorflow-decision-forests>=1.5.0->tensorflowjs)
  Downloading wurlitzer-3

In [16]:
model.save('keras.h5')

  saving_api.save_model(


In [17]:
!mkdir model
!tensorflowjs_converter --input_format keras keras.h5 model/

2023-10-21 16:59:41.322463: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-10-21 16:59:41.322536: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-10-21 16:59:41.322592: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [18]:
!cp class_names.txt model/class_names.txt

In [20]:
!zip -r model.zip model

  adding: model/ (stored 0%)
  adding: model/model.json (deflated 82%)
  adding: model/group1-shard1of1.bin (deflated 7%)
  adding: model/class_names.txt (deflated 34%)


In [21]:
from google.colab import files
files.download('model.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>