In [None]:
!mkdir ~/.kaggle
!cp /content/kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d google/tinyquickdraw
!unzip tinyquickdraw

In [12]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras import callbacks

import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d

import numpy as np
from pathlib import Path
import glob
import json

In [4]:
dataset = list(Path('/content/quickdraw_simplified').glob('**/*.ndjson'))

In [None]:
len(dataset)

In [124]:
stop_pt = 5


classes = {}
for i, path in enumerate(dataset[:stop_pt]):
  name = str(path).split('/')[-1].split('.')[0]
  classes[name] = i

print(classes)

{'parrot': 0, 'mushroom': 1, 'sock': 2, 'eyeglasses': 3, 'dishwasher': 4}


In [41]:
x_axis = []
y_axis = []
tag = []
max_length = 0

for index, path in enumerate(dataset[:stop_pt]):
  with open(str(path)) as fp:
    for line in fp:
      drawing = json.loads(line)
      t_x = [0 for _ in range(stop_pt)]
      t_x[index] = 1
      tag.append(t_x)
      t_x = []; t_y = []
      for x, y in drawing['drawing']:
        length = len(x)
        if max_length < length:
          max_length = length
        t_x.append(x); t_y.append(y)
      x_axis.append(t_x); y_axis.append(t_y)

print(max_length)

933


In [42]:
for p, (img_x, img_y) in enumerate(zip(x_axis, y_axis)):
  max_col = max([len(_) for _ in img_x])

  #add padding
  for _ in range(len(img_x[0]), max_col):
    x_axis[p][0].append(0)
    y_axis[p][0].append(0)

  #perform hashing
  for index, (x_vec, y_vec) in enumerate(zip(img_x[1:], img_y[1:])):
    for k, (i, j) in enumerate(zip(x_vec, y_vec)):
      x_axis[p][0][k] = x_axis[p][0][k] + index*i; y_axis[p][0][k] = y_axis[p][0][k] + index*j

In [43]:
img_vectors_x = []
img_vectors_y = []
max_val = 0

#collect zero index vectors
for p, (img_x, img_y) in enumerate(zip(x_axis, y_axis)):
  if max(img_x[0]) > max_val:
    max_val = max(img_x[0])
  if max(img_y[0]) > max_val:
    max_val = max(img_x[0])
  img_vectors_x.append(img_x[0]); img_vectors_y.append(img_y[0])

print(max_val)

13471196


In [44]:
del x_axis
del y_axis

In [45]:
#add padding up to the observable point

observ_pt = 9
train_x = []
train_y = []
labels = []

for i, (vec_x, vec_y) in enumerate(zip(img_vectors_x, img_vectors_y)):
  if len(vec_x) < observ_pt:
    del img_vectors_x[i]
    del img_vectors_y[i]
    del tag[i]
    continue
  train_x.append(img_vectors_x[i][:observ_pt])
  train_y.append(img_vectors_y[i][:observ_pt])
  labels.append(tag[i])

In [46]:
del img_vectors_x
del img_vectors_y

In [47]:
#normalize vectors
train_x = np.array(train_x)
train_y = np.array(train_y)
labels = np.array(labels)

print(train_x.shape, train_y.shape, labels.shape)

(890939, 9) (890939, 9) (890939, 5)


In [49]:
train_x[0]

array([291, 287, 127, 101,  77,  49,  29,   5,   0])

In [13]:
input_layer = layers.Input(shape=(9, ))

x = tf.expand_dims(input_layer, axis=1)

x = layers.Bidirectional(layers.LSTM(256, return_sequences = True))(x)
skip_connection = x

x = layers.Bidirectional(layers.LSTM(256, return_sequences = True))(x)
x = layers.Concatenate()([x, skip_connection])

x = layers.Bidirectional(layers.LSTM(128))(x)
x = layers.LayerNormalization()(x)

output_layer = layers.Flatten()(x)

model_x = models.Model(input_layer, output_layer)

In [14]:
input_layer_y = layers.Input(shape=(9, ))

y = tf.expand_dims(input_layer_y, axis=1)

y = layers.Bidirectional(layers.LSTM(256, return_sequences = True))(y)
skip_connection = y

y = layers.Bidirectional(layers.LSTM(256, return_sequences = True))(y)
y = layers.Concatenate()([y, skip_connection])

y = layers.Bidirectional(layers.LSTM(128))(y)
y = layers.LayerNormalization()(y)

y = layers.Flatten()(y)
output_layer_y = layers.Dense(256, activation='relu')(y)

model_y = models.Model(input_layer_y, output_layer_y)

In [16]:
stop_pt = 5

in_x = layers.Input(shape=(9, ))
in_y = layers.Input(shape=(9, ))

x = model_x([in_x])
y = model_y([in_y])

xy = layers.Concatenate()([x, y])

prediction_layer = layers.Dense(stop_pt, activation='softmax')(xy)

model = models.Model([in_x, in_y], prediction_layer)

In [92]:
model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

In [90]:
model.summary()

Model: "model_12"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_34 (InputLayer)       [(None, 9)]                  0         []                            
                                                                                                  
 input_35 (InputLayer)       [(None, 9)]                  0         []                            
                                                                                                  
 model_10 (Functional)       (None, 256)                  3300864   ['input_34[0][0]']            
                                                                                                  
 model_11 (Functional)       (None, 256)                  3366656   ['input_35[0][0]']            
                                                                                           

In [63]:
earlyStoping = callbacks.EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

In [76]:
train_x.shape

(890939, 9)

In [93]:
batch_size=32
history = model.fit(
    [train_x, train_y], labels,
    epochs = 10,
    shuffle = True,
    validation_split = 0.2
    batch_size = batch_size,
    callbacks = [earlyStoping]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7e5a5c7aee60>

In [95]:
from google.colab import drive
drive.mount('/content/mdrive/')

Mounted at /content/mdrive/


In [96]:
model.save('/content/mdrive/MyDrive/Colab Notebooks/doodle_draw.keras')

In [101]:
train_y[0]

array([104,  83,  10,   0,   0,   8,  48,  57,  66])

In [153]:
len(train_x)

890939

In [158]:
i = 890900
x = np.array([train_x[i]])
y = np.array([train_y[i]])

In [159]:
prediction = model.predict([x, y])



In [160]:
np.argmax(prediction[0])

4

In [161]:
labels[i]

array([0, 0, 0, 0, 1])

In [None]:
for key, value in classes.items():
  if value == np.argmax(labels[i]):
    print("True: ", key)

for key, value in classes.items():
  if value == np.argmax(prediction[0]):
    print("Predicted: ", key)

In [165]:
from tensorflow.keras.models import load_model

In [163]:
class Predict():
  def __init__(self, model_path):
    self.model = load_model(str(model_path))

  def recognize(self, x, y):
    x = np.array([x])
    y = np.array([y])
    prediction = model.predict([x, y])
    for key, value in classes.items():
      if value == np.argmax(prediction[0]):
        prediction = key

    return prediction

True:  dishwasher
Predicted:  dishwasher


In [166]:
import numpy as np
import cv2 as cv
import threading
import os

os.system('mkdir liveRecognition')

t1 = threading.Thread(target=captureFrames)
t2 = threading.Thread(target=preprocess_image, args=path)

t1.start()
t2.start()

t1.join()
t2.join()


def captureFrames():
  #save only last 3 images
  i = 0
  cap = cv.VideoCapture(0)
  if not cap.isOpened():
      print("Cannot open camera")
      exit()
  while True:
      ret, frame = cap.read()
      if not ret:
          print("Can't receive frame (stream end?). Exiting ...")
          break
      cv.imshow('frame', frame)
      cv.imwrite(f'image_{i}.jpg', frame)
      i = i + 1
      if i == 3:
        i = 0
      if cv.waitKey(1) == ord('q'):
          break
  cap.release()
  cv.destroyAllWindows()

def preprocess_image(path):
  paths = list(Path(path).glob('**/*.jpg'))
  for path in paths:
    ndjson_data = image_to_ndjson(path)


Cannot open camera
Can't receive frame (stream end?). Exiting ...


In [5]:
from PIL import Image
import json
import sys

def image_to_ndjson(image_path):


# Convert features to NDJSON format
ndjson_data = json.dumps(features)

# Write NDJSON data to a file
with open('image.ndjson', 'w') as file:
    file.write(ndjson_data + '\n')

    try:
        # Open the image file
        img = Image.open(image_path)
    except IOError:
        print("Unable to open image file")
        return
    features = {
      "width": img.width,
      "height": img.height,
      "format": img.format,
      "drawing": []
    }

    pixels = img.load()
    for y in range(img.height):
      for x in range(img.width):
        features["drawing"].append([[x], [y]]))

    # Convert features to NDJSON format
    ndjson_data = ""
    for coordinate in features["drawing"]:
        ndjson_data += json.dumps({**features, "drawing": coordinate}) + '\n'


    # Serialize pixel data to NDJSON format
    ndjson_data = '\n'.join(json.dumps(entry) for entry in pixel_data)
    return ndjson_data

In [10]:
ndjson_data = image_to_ndjson('/content/20230122_180144.jpg')