In [1]:
from PIL import Image
import json 
import numpy as np
import os 
from model.model import get_model
import pickle

from sklearn.model_selection import train_test_split

In [2]:
def read_data(folder, n_per_class = 10_000):
    data = []
    labels = []
    label_mapping = {}
    files = os.listdir(folder)
    for file in files:
        label = len(label_mapping)
        print(file)
        label_mapping[label] = file.split("_")[-1].split(".")[0]
        tmp_data = np.load(os.path.join(folder,file), encoding='latin1', allow_pickle=True)
        for data_point in tmp_data[:n_per_class]:
            data.append(np.array(data_point).reshape((28,28)))
            labels.append(label)
    
    return np.array(data), np.array(labels) , label_mapping


In [3]:
# You can get data by downloading some files from here and put them in the data folder
#https://console.cloud.google.com/storage/browser/quickdraw_dataset/full/numpy_bitmap;tab=objects?prefix=&forceOnObjectsSortingFiltering=false

data, labels, label_mapping = read_data("Data", n_per_class=80_000)

x_train, x_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, random_state=42)

full_numpy_bitmap_airplane.npy
full_numpy_bitmap_broccoli.npy
full_numpy_bitmap_campfire.npy
full_numpy_bitmap_dumbbell.npy
full_numpy_bitmap_elephant.npy
full_numpy_bitmap_mushroom.npy
full_numpy_bitmap_postcard.npy
full_numpy_bitmap_sailboat.npy
full_numpy_bitmap_suitcase.npy


In [4]:
from collections import Counter# Count occurrences of each unique element
element_counts = Counter(labels)

# Print the counts
for element, count in element_counts.items():
    print(f"Element: {element}, Count: {count}")


Element: 0, Count: 80000
Element: 1, Count: 80000
Element: 2, Count: 80000
Element: 3, Count: 80000
Element: 4, Count: 80000
Element: 5, Count: 80000
Element: 6, Count: 80000
Element: 7, Count: 80000
Element: 8, Count: 80000


In [5]:
del data
del labels

In [6]:
model = get_model(len(label_mapping),dropout_rate=0, l2_lambda=0)

In [7]:
x_train_reshaped = x_train.reshape(-1, 28, 28, 1)

# Normalize the data values to range [0, 1]
x_train_normalized = x_train_reshaped / 255.0

x_val_reshaped = x_val.reshape(-1, 28, 28, 1)

# Normalize the data values to range [0, 1]
x_val_normalized = x_val_reshaped / 255.0




In [8]:
for i in range(5):
    history = model.fit(x_train_normalized, y_train, epochs=1, batch_size=64, validation_data=(x_val_normalized, y_val))
    model.save_weights(f'model/model_epoch_{i}.h5')



KeyboardInterrupt: 

In [9]:
from sklearn.metrics import confusion_matrix
y_pred = model.predict(x_val_normalized)
y_pred_classes = np.argmax(y_pred, axis=1) 
cm = confusion_matrix(y_val, y_pred_classes)
((cm / cm.sum(axis=1, keepdims=True))*10000).astype(int)



array([[9514,   62,   77,  130,   98,   12,   34,   50,   18],
       [  43, 9451,   73,  107,  176,  126,    5,    3,   13],
       [  70,   95, 9521,  174,   62,    6,   23,   29,   16],
       [ 106,   76,  158, 9239,  186,   38,   58,   35,  101],
       [  79,  120,   60,  154, 9509,   33,   13,   13,   14],
       [  35,  321,   25,   91,  102, 9376,   10,   15,   20],
       [  18,   21,   28,   94,   23,    5, 9356,    7,  444],
       [  74,    9,   63,   73,   22,    4,   17, 9710,   22],
       [  16,   27,   27,  109,   38,    9,  171,   15, 9583]])

In [20]:


directory = "model"
# Save dictionary using pickle
with open(os.path.join(directory, 'label_mapping.pkl'), 'wb') as f:
    pickle.dump(label_mapping, f)
