# Imports

In [6]:
# misc
import sys
import os

# load/save files
import zipfile
import json

# plot
import matplotlib.pyplot as plt
from PIL import Image

# datascience libs
import numpy as np
import pandas as pd


try: # python
    path_ = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
except NameError: # jupyter notebook
    path_ = os.path.dirname(os.getcwd())

dataset_dir = os.path.join(path_, "datasets")
model_dir = os.path.join(path_, "models")


2023-07-06 21:04:52.491617: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-06 21:04:54.533370: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-07-06 21:04:54.533502: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-07-06 21:05:04.196949: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

# Helpers

In [3]:
from time import time
from sys import argv

def _time(f):
    def wrapper(*args):
        start = time()
        r = f(*args)
        end = time()
        print("%s(): timed %fs" % (f.__name__, end-start))
        return r
    return wrapper

# Load exemple

In [34]:
try:
    from types import SimpleNamespace as Namespace
except ImportError:
    from argparse import Namespace

@_time
def emnist_load_data(dir_path:str, return_mapping=False):
    # https://towardsdatascience.com/efficiently-splitting-an-image-into-tiles-in-python-using-numpy-d1bf0dd7b6f7
    def array_to_tiled_array(img:np.ndarray, kernel_size:tuple):
        if len(img.shape) == 2:
            img = np.expand_dims(img, axis=-1)
        img_height, img_width, channels = img.shape
        tile_height, tile_width = kernel_size
        tiles = img.reshape(img_height // tile_height,
                            tile_height,
                            img_width // tile_width,
                            tile_width,
                            channels)
        return tiles.swapaxes(1,2).reshape(-1, tile_height,tile_width, 1)

    def load_data_X(path:str):
        with open(path, 'r', encoding='utf-8') as f:
            obj = json.loads(f.read(), object_hook = lambda d: Namespace(**d))
        X = np.zeros((0, 28,28,1), dtype="uint8")
        for s in obj.files:
            img_path = os.path.join(os.path.dirname(path), s)
            im = Image.open(img_path).convert('L')
            data = array_to_tiled_array(np.array(im,dtype="uint8"), (28,28))
            X = np.append(X, data, axis=0)
        return X

    def load_data_y(path:str):
        with open(path, 'r', encoding='utf-8') as f:
            obj = json.loads(f.read(), object_hook = lambda d: Namespace(**d))
        return [np.array(obj.id, dtype="uint8"),
                np.array(obj.bbox, dtype="uint8")], np.array(obj.mapping, dtype="uint8")
    
    path = os.path.join(dir_path, "test.json")
    X_test = load_data_X(path)
    y_test, y_mapping = load_data_y(path)
    path = os.path.join(dir_path, "train.json")
    X_train = load_data_X(path)
    y_train, y_mapping = load_data_y(path)
    if return_mapping:
        return (X_train, y_train), (X_test, y_test),  list(map(lambda x: chr(x), y_mapping))
    else:
        return (X_train, y_train), (X_test, y_test)
    


# Exemple

In [7]:
# datascience libs
import numpy as np
import pandas as pd

# tensorflow
from tensorflow import keras
import tensorflow as tf
import tensorflowjs as tfjs

from keras import layers, models, optimizers, regularizers
from keras import datasets
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

from keras.backend import expand_dims
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [45]:
#(X_train, y_train), (X_test, y_test) = datasets.mnist.load_data(path="mnist.npz")
(X_train, y_train), (X_test, y_test), y_mapping = emnist_load_data(os.path.join(dataset_dir, "origin-emnist-mnist"), True)

# convert target[0] to categorical
num_classes = max(y_train[0])+1 # len(y_mapping)
y_train[0] = to_categorical(y_train[0], num_classes=num_classes, dtype ="int8")
y_test[0] = to_categorical(y_test[0], num_classes=num_classes, dtype ="int8")

# add validation sets
y_val = [None,None]
(X_train, X_val) = train_test_split(X_train, test_size=0.25, random_state=1)
(y_train[0], y_val[0]) = train_test_split(y_train[0], test_size=0.25, random_state=1)
(y_train[1], y_val[1]) = train_test_split(y_train[1], test_size=0.25, random_state=1)

print("")
print("X_train:", X_train.shape)
print("y_train_id:", y_train[0].shape)
print("y_train_bbox:", y_train[1].shape)
print("")
print("X_val:", X_val.shape)
print("y_val_id:", y_val[0].shape)
print("y_val_bbox:", y_val[1].shape)
print("")
print("X_test:", X_test.shape)
print("y_test_id:", y_test[0].shape)
print("y_test_bbox:", y_test[1].shape)
print("\nMapping:")
print(y_mapping)


emnist_load_data(): timed 0.039964s

X_train: (150, 28, 28, 1)
y_train_id: (150, 10)
y_train_bbox: (150, 4)

X_val: (51, 28, 28, 1)
y_val_id: (51, 10)
y_val_bbox: (51, 4)

X_test: (201, 28, 28, 1)
y_test_id: (201, 10)
y_test_bbox: (201, 4)

Mapping:
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']


In [9]:

def set_model(X):
    model = models.Sequential()
    model.add(layers.Conv2D(32, kernel_size=(5, 5),
                            activation='relu', padding='same', input_shape = X[0].shape))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(32, kernel_size=(5, 5), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(strides=(2,2)))
    model.add(layers.Dropout(0.25))

    model.add(layers.Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(strides=(2,2)))
    model.add(layers.Dropout(0.25))
    
    model.add(layers.Flatten())
    model.add(layers.Dense(1024, activation='relu'))
    model.add(layers.Dropout(0.25))
    model.add(layers.Dense(1024, activation='relu'))
    model.add(layers.Dropout(0.25))
    model.add(layers.Dense(10, activation='softmax'))
    return model

In [10]:
model = set_model(X_train)
model.summary()

2023-07-06 21:06:34.924572: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-07-06 21:06:34.929894: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-07-06 21:06:34.929999: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (DESKTOP-DIV5ILM): /proc/driver/nvidia/version does not exist
2023-07-06 21:06:34.943377: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 32)        832       
                                                                 
 batch_normalization (BatchN  (None, 28, 28, 32)       128       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 32)        25632     
                                                                 
 batch_normalization_1 (Batc  (None, 28, 28, 32)       128       
 hNormalization)                                                 
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 32)       0         
 )                                                               
                                                        

In [11]:
model.compile(optimizer = optimizers.Adam(lr=0.0001),
              loss = 'categorical_crossentropy',
              metrics = ["accuracy"])

  super().__init__(name, **kwargs)


In [19]:
"""history = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=32),
                              validation_data=(X_val, Y_val)
                              steps_per_epoch=1000,
                              epochs=25,
                              verbose=1)"""
callbacks = []
es = EarlyStopping(patience = 10, restore_best_weights = True)
callbacks.append(es)

start_time = timer()
history = model.fit(X_train, y_train,
                    validation_data = (X_val, y_val),
                    batch_size = 32,
                    epochs = 3,
                    callbacks = callbacks,
                    verbose = 1)
training_time = timer() - start_time

NameError: name 'timer' is not defined