In [1]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from os import walk

In [2]:
batch_size = 128
img_rows, img_cols = 28, 28

In [3]:
data_path = "data_files/"
for (dir_path, dirnames, filenames) in walk(data_path):
    pass
print(filenames)

['full_numpy_bitmap_airplane.npy', 'full_numpy_bitmap_angel.npy', 'full_numpy_bitmap_backpack.npy', 'full_numpy_bitmap_bird.npy', 'full_numpy_bitmap_blackberry.npy', 'full_numpy_bitmap_drums.npy', 'full_numpy_bitmap_dumbbell.npy', 'full_numpy_bitmap_feather.npy', 'full_numpy_bitmap_guitar.npy', 'full_numpy_bitmap_The Mona Lisa.npy']


In [6]:
num_images = 100000
num_files = len(filenames)
images_per_category = num_images//num_files
seed = np.random.randint(1, 10e7)

In [7]:
for i in range(10):
    print('i = %d'%i)
    for file in filenames:
        file_path = data_path + file
        x = np.load(file_path)
        x = x.astype('float32')
        x /= 255.

        y = [i]*len(x) #Creating a numerical label for this image.
        x = x[:images_per_category]
        y = y[:images_per_category]

        if i == 0:
            x_all = x
            y_all = y
        else:
            x_all = np.concatenate((x, x_all), axis = 0)
            y_all = np.concatenate((y, y_all), axis = 0)


i = 0
i = 1
i = 2
i = 3
i = 4
i = 5
i = 6
i = 7
i = 8
i = 9


In [10]:
print(np.shape(x), np.shape(y), np.shape(x_all), np.shape(y_all))
print(y_all[:4])

(10000, 784) (10000,) (910000, 784) (910000,)
[9 9 9 9]


In [11]:
x_train, x_test, y_train, y_test = train_test_split(x_all, y_all, test_size = 0.2, random_state = 42)

In [12]:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

In [13]:
y_train = tf.keras.utils.to_categorical(y_train, num_files)
y_test = tf.keras.utils.to_categorical(y_test, num_files)

In [18]:
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size = 0.1, random_state = 42)

In [19]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(32, kernel_size = (3, 3), activation='relu', input_shape = input_shape))
model.add(tf.keras.layers.MaxPooling2D(pool_size = (2, 2)))
model.add(tf.keras.layers.Dropout(0.25))
model.add(tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size = (2, 2)))
model.add(tf.keras.layers.Dropout(0.25))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128, activation = 'relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(num_files, activation = 'softmax'))
print('Compiling...')
model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer = tf.keras.optimizers.Adam(), metrics = ['accuracy'])

model.summary()

Compiling...


In [22]:
epochs = 25
callbacks = [tf.keras.callbacks.TensorBoard(log_dir = ".\\tb_log_dir")]
model.fit(x_train, y_train, batch_size = batch_size, epochs = epochs, callbacks = callbacks, verbose = 1, validation_data = (x_valid, y_valid))

Train on 64800 samples, validate on 7200 samples
Epoch 1/25
  128/64800 [..............................] - ETA: 32:02

InvalidArgumentError:  logits and labels must be broadcastable: logits_size=[128,10] labels_size=[12800,10]
	 [[node loss/dense_1_loss/softmax_cross_entropy_with_logits (defined at <ipython-input-22-c83482d0ba71>:3) ]] [Op:__inference_distributed_function_1226]

Function call stack:
distributed_function


In [None]:
score = model.evaluate(x_test, y_test, verbose = 1)
print("test loss : %.3f"%score[0])
print("test accuracy : %3f"%score[1])

In [None]:
import os
labels = [os.path.splitex(file)[0] for file in filenames]
print(labels)
print("for each pair, the first label is predicted, the second is the actual.")
for i in range(20):
    t = np.random.randint(len(x_test))
    x1 = x_test[t]
    x1 = x1.reshape(1, 28, 28, 1)
    p = model.predict(x1)
    print('___________________')
    print(labels[np.argmax(p)])
    print(labels[np.argmax(y_test[t])])
    print('___________________')

## Saving, Loading and retesting the model.

In [None]:
model.save("./QDrawModel.h5")
del model

In [None]:
from tensorflow.keras.models import load_model
model = load_model('.QDrawModel.h5')

In [None]:
model.summary()

## Saving the training and testd data

In [None]:
import h5py

with h5py.File('x_train.h5', 'w') as hf:
    hf.create_dataset("QuickDraw", data = x_train)
with h5py.File('y_train.h5', 'w') as hf:
    hf.create_dataset("QuickDraw", data=y_train)
with h5py.File('x_test.h5', 'w') as hf:
    hf.create_dataset("QuickDraw", data = x_test)
with h5py.File('y_test.h5', 'w') as hf:
    hf.create_dataset("QuickDraw", data=y_test)

In [None]:
hf = h5py.File('x_train.h5', 'r')
x_train = np.array(hf['QuickDraw'][:])
hf = h5py.File('y_train.h5', 'r')
y_train = np.array(hf['QuickDraw'][:])
hf = h5py.File('x_test.h5', 'r')
x_test = np.array(hf['QuickDraw'][:])
hf = h5py.File('x_test.h5', 'r')
y_test = np.array(hf['QuickDraw'][:])

## Loading and inference with a pre-trained model

In [None]:
from tensorflow.keras.models import load_model
model = load_model('.QDrawModel.h5')
model.summary()

In [None]:
import tensorflow as tf
with tf.device('/cpu:0'):
    for i in range(10):
        t = np.random.randint(len(x_test))
        x1 = x_test[t]
        x1 = x1.reshape(1, 28, 28, 1)
        p = model.predict(x1)
        y1 = y_test[t]
        print('___________________')
        print(labels[np.argmax(p)])
        print(labels[np.argmax(y1])
        print('___________________')