In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import numpy as np
import h5py
from sklearn.model_selection import train_test_split
from os import walk

## Acquire The Data

In [2]:
batch_size = 128
img_rows, img_cols = 28, 28       # image dims

In [3]:
#load npy arrays 


In [4]:
data_path = "data_files/" # folder for image files
for (dirpath, dirnames, filenames) in walk(data_path):
    pass # file names accumulate in list  'filenames'
print(filenames)

['broom.npy', 'aircraft_carrier.npy', 'alarm_clock.npy', 'ant.npy', 'cell_phone.npy', 'baseball.npy', 'asparagus.npy', 'dolphin.npy', 'crocodile.npy', 'bee.npy']


In [5]:
num_images = 1000000 ### was 100000, reduce this number if memory issues.
num_files = len(filenames) # *** we have 10 files ***
images_per_category = num_images//num_files
seed = np.random.randint(1, 10e7)
i=0
print(images_per_category)

100000


In [6]:
for file in filenames:
    file_path = data_path + file
    x = np.load(file_path)
    x = x.astype('float32')    ##normalise images
    x /= 255.0
    y = [i] * len(x) # create numeric label for this image

    x = x[:images_per_category] # get our sample of images
    y = y[:images_per_category] # get our sample of labels

    if i == 0:
        x_all = x
        y_all = y
    else:
        x_all = np.concatenate((x,x_all), axis=0)
        y_all = np.concatenate((y,y_all), axis=0)
    i += 1

In [7]:
#split data arrays into  train and test segments
x_train, x_test, y_train, y_test = train_test_split(x_all, y_all, test_size=0.2, random_state=42)

In [8]:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

In [9]:
y_train = tf.keras.utils.to_categorical(y_train, num_files)
y_test = tf.keras.utils.to_categorical(y_test, num_files)

In [10]:
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.1, random_state=42)

x_train shape: (800000, 28, 28, 1)
800000 train samples
200000 test samples


## Create the model

In [11]:
model = tf.keras.Sequential()

model.add(tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Dropout(0.25))

model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Dropout(0.25))

model.add(tf.keras.layers.Flatten())

model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))

model.add(tf.keras.layers.Dense(num_files, activation='softmax'))
print("Compiling...........")

Compiling...........


In [12]:
model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adadelta(),
              metrics=['accuracy'])

## Train the model

In [13]:
epochs=1 # for testing, for training use 25
callbacks=[tf.keras.callbacks.TensorBoard(log_dir = "./tb_log_dir", histogram_freq = 0)]
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=callbacks,
          verbose=1,
          validation_data=(x_valid, y_valid))

Train on 720000 samples, validate on 80000 samples


<tensorflow.python.keras.callbacks.History at 0x7efc56963780>

In [14]:
score = model.evaluate(x_test, y_test, verbose=1)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 2.068418756465912
Test accuracy: 0.39709


## Test The Model 

In [15]:
#_test

import os
labels = [os.path.splitext(file)[0] for file in filenames]
print(labels)
print("\nFor each pair in the following, the first label is predicted, second is actual\n")
for i in range(20):
    t = np.random.randint(len(x_test) )
    x1= x_test[t]
    x1 = x1.reshape(1,28,28,1)
    p = model.predict(x1)
    print("-------------------------")
    print(labels[np.argmax(p)])
    print(labels[np.argmax(y_test[t])])
    print("-------------------------")




['broom', 'aircraft_carrier', 'alarm_clock', 'ant', 'cell_phone', 'baseball', 'asparagus', 'dolphin', 'crocodile', 'bee']

For each pair in the following, the first label is predicted, second is actual

-------------------------
cell_phone
alarm_clock
-------------------------
-------------------------
baseball
baseball
-------------------------
-------------------------
asparagus
broom
-------------------------
-------------------------
bee
cell_phone
-------------------------
-------------------------
bee
bee
-------------------------
-------------------------
alarm_clock
cell_phone
-------------------------
-------------------------
cell_phone
cell_phone
-------------------------
-------------------------
asparagus
broom
-------------------------
-------------------------
cell_phone
baseball
-------------------------
-------------------------
aircraft_carrier
aircraft_carrier
-------------------------
-------------------------
cell_phone
cell_phone
-------------------------
--------

## Save, Reload and Retest the Model

In [16]:
model.save("./QDrawModel.h5")

In [17]:
del model

In [18]:
from tensorflow.keras.models import load_model


In [19]:
import numpy as np

In [20]:
model = load_model('./QDrawModel.h5')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 5, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1600)              0

In [21]:
print("For each pair, first is predicted, second is actual")
for i in range(20):
    t = np.random.randint(len(x_test))
    x1= x_test[t]
    x1 = x1.reshape(1,28,28,1)
    p = model.predict(x1)
    print("-------------------------")
    print(labels[np.argmax(p)])
    print(labels[np.argmax(y_test[t])])
    print("-------------------------")

For each pair, first is predicted, second is actual
-------------------------
broom
broom
-------------------------
-------------------------
cell_phone
alarm_clock
-------------------------
-------------------------
crocodile
dolphin
-------------------------
-------------------------
alarm_clock
alarm_clock
-------------------------
-------------------------
cell_phone
aircraft_carrier
-------------------------
-------------------------
bee
crocodile
-------------------------
-------------------------
cell_phone
alarm_clock
-------------------------
-------------------------
cell_phone
cell_phone
-------------------------
-------------------------
bee
crocodile
-------------------------
-------------------------
cell_phone
asparagus
-------------------------
-------------------------
broom
broom
-------------------------
-------------------------
cell_phone
alarm_clock
-------------------------
-------------------------
aircraft_carrier
crocodile
-------------------------
-----------