In [1]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
import os

#### load the dataset

In [13]:
dataset_dir = "quick_draw_dataset"
files = [name for name in os.listdir(dataset_dir) if ".npy" in name]
max_size_per_cl = 1500
draw_class = []

# Evalueate the size of the dataset
size = 0
for name in files:
    draws = np.load(os.path.join(dataset_dir, name))
    draws = draws[:max_size_per_cl] # Take only 10 000 draw
    size += draws.shape[0]

images = np.zeros((size, 28, 28))
targets = np.zeros((size,))

it = 0
t = 0
for name in files:
    # Open each dataset and add the new class
    draw_class.append(name.replace("full_numpy_bitmap_", "").replace(".npy", ""))
    draws = np.load(os.path.join(dataset_dir, name))
    draws = draws[:max_size_per_cl] # Take only 10 000 draw
    # Add images to the buffer
    images[it:it+draws.shape[0]] = np.invert(draws.reshape(-1, 28, 28))
    targets[it:it+draws.shape[0]] = t
    # Iter
    it += draws.shape[0]
    t += 1

images = images.astype(np.float32)
    
# Shuffle dataset
indexes = np.arange(size)
np.random.shuffle(indexes)
images = images[indexes]
targets = targets[indexes]

print(draw_class)

['airplane', 'apple', 'book', 'brain', 'car', 'chair', 'dog', 'eye', 'face', 'The Eiffel Tower']


#### Normalization

In [14]:
scaler = StandardScaler()
images = scaler.fit_transform(images.reshape(-1, 28*28))

#Pour le reseau à convolution, reshape avec 4ieme dim (couleurs) 
images = images.reshape(-1, 28, 28, 1)

print(images.shape)

(15000, 28, 28, 1)


#### Spliting the dataset into train_set and test_set

In [15]:
images_train, images_test, targets_train, targets_test = train_test_split(images, targets, test_size = 0.2, random_state =  42)

print(images_train.shape, targets_train.shape)
print(images_test.shape, targets_test.shape)

(12000, 28, 28, 1) (12000,)
(3000, 28, 28, 1) (3000,)


#### Creating a model

In [16]:
#Création d'un model séquentiel
model = tf.keras.models.Sequential()

#Ajout des layers
#Convolutions
model.add(tf.keras.layers.Conv2D(32, 4, activation = 'relu'))
model.add(tf.keras.layers.Conv2D(64, 3, activation = 'relu'))
model.add(tf.keras.layers.Conv2D(128, 3, activation = 'relu'))
#Flatten the convolution
model.add(tf.keras.layers.Flatten(name = 'flatten'))
#Dense layers
model.add(tf.keras.layers.Dense(128, activation = 'relu'))
model.add(tf.keras.layers.Dense(10, activation = 'relu'))
model.add(tf.keras.layers.Dense(10, activation = 'softmax'))

model_output = model(images[0:1])
print(model_output, targets[0:1])

tf.Tensor(
[[0.1082781  0.1057253  0.08073453 0.11335583 0.09393829 0.10881192
  0.10205632 0.08900727 0.09339555 0.10469681]], shape=(1, 10), dtype=float32) [3.]


In [17]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (1, 25, 25, 32)           544       
_________________________________________________________________
conv2d_7 (Conv2D)            (1, 23, 23, 64)           18496     
_________________________________________________________________
conv2d_8 (Conv2D)            (1, 21, 21, 128)          73856     
_________________________________________________________________
flatten (Flatten)            (1, 56448)                0         
_________________________________________________________________
dense_6 (Dense)              (1, 128)                  7225472   
_________________________________________________________________
dense_7 (Dense)              (1, 10)                   1290      
_________________________________________________________________
dense_8 (Dense)              (1, 10)                  

#### Compile the model

In [18]:
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

#### Train the model on the train_set

In [20]:
history = model.fit(images_train, targets_train, epochs = 10, batch_size=32, validation_split = 0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
