In [1]:
from PIL import Image
import json 
import numpy as np
import os 
from model.model import get_model
import pickle

from sklearn.model_selection import train_test_split

In [2]:
def read_data(folder, n_per_class = 10_000):
    data = []
    labels = []
    label_mapping = {}
    files = os.listdir(folder)
    for file in files:
        label = len(label_mapping)
        print(file)
        label_mapping[label] = file.split("_")[-1].split(".")[0]
        tmp_data = np.load(os.path.join(folder,file), encoding='latin1', allow_pickle=True)
        for data_point in tmp_data[:n_per_class]:
            data.append(np.array(data_point).reshape((28,28)))
            labels.append(label)
    
    return np.array(data), np.array(labels) , label_mapping


In [3]:
# You can get data by downloading some files from here and put them in the data folder
#https://console.cloud.google.com/storage/browser/quickdraw_dataset/full/numpy_bitmap;tab=objects?prefix=&forceOnObjectsSortingFiltering=false

data, labels, label_mapping = read_data("Data", n_per_class=70_000)

x_train, x_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, random_state=42)

full_numpy_bitmap_ant.npy
full_numpy_bitmap_arm.npy
full_numpy_bitmap_axe.npy
full_numpy_bitmap_bee.npy
full_numpy_bitmap_car.npy
full_numpy_bitmap_cat.npy
full_numpy_bitmap_cup.npy
full_numpy_bitmap_ear.npy
full_numpy_bitmap_key.npy
full_numpy_bitmap_pig.npy
full_numpy_bitmap_sun.npy


In [4]:
from collections import Counter# Count occurrences of each unique element
element_counts = Counter(labels)

# Print the counts
for element, count in element_counts.items():
    print(f"Element: {element}, Count: {count}")


Element: 0, Count: 70000
Element: 1, Count: 70000
Element: 2, Count: 70000
Element: 3, Count: 70000
Element: 4, Count: 70000
Element: 5, Count: 70000
Element: 6, Count: 70000
Element: 7, Count: 70000
Element: 8, Count: 70000
Element: 9, Count: 70000
Element: 10, Count: 70000


In [5]:
del data
del labels

In [9]:
model = get_model(len(label_mapping),dropout_rate=0, l2_lambda=0)

In [7]:
x_train_reshaped = x_train.reshape(-1, 28, 28, 1)

# Normalize the data values to range [0, 1]
x_train_normalized = x_train_reshaped / 255.0

x_val_reshaped = x_val.reshape(-1, 28, 28, 1)

# Normalize the data values to range [0, 1]
x_val_normalized = x_val_reshaped / 255.0




In [None]:

history = model.fit(x_train_normalized, y_train, epochs=10, batch_size=128, validation_data=(x_val_normalized, y_val))


In [None]:
from sklearn.metrics import confusion_matrix
y_pred = model.predict(x_val_normalized)
y_pred_classes = np.argmax(y_pred, axis=1) 
cm = confusion_matrix(y_val, y_pred_classes)
((cm / cm.sum(axis=1, keepdims=True))*10000).astype(int)

In [20]:
model.save_weights('model/model.h5')

directory = "model"
# Save dictionary using pickle
with open(os.path.join(directory, 'label_mapping.pkl'), 'wb') as f:
    pickle.dump(label_mapping, f)
