# CNN - Hiragana
*Matis CAFFIAUX*

**8INF911 - Apprentissage Profond - Hiver 2022**

In [1]:
import tensorflow as tf

from tensorflow.keras import layers, models
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical

import numpy as np

## Dictionnaire de labels

In [2]:
labelToIndex = {
    "A" : 0, "I" : 1, "U" : 2, "E" : 3, "O" : 4, 
    "KA" : 5, "KI" : 6, "KU" : 7, "KE" : 8, "KO" : 9,
    "SA" : 10 ,"SHI" : 11 ,"SU" : 12 , "SE" : 13 , "SO" : 14, 
    "TA" : 15 ,"CHI" : 16 ,"TSU" : 17 ,"TE" : 18 ,"TO" : 19 ,
    "NA" : 20 ,"NI" : 21 ,"NU" : 22 ,"NE" : 23 ,"NO" : 24 ,
    "HA" : 25 ,"HI" : 26 ,"FU" : 27 ,"HE" : 28 ,"HO" : 29 ,
    "MA" : 30 ,"MI" : 31 ,"MU" : 32 ,"ME" : 33 ,"MO" : 34 ,
    "YA" : 35 ,"YU" : 36 ,"YO" : 37 ,
    "RA" : 38 ,"RI" : 39 ,"RU" : 40 ,"RE" : 41 ,"RO" : 42 ,
    "WA" : 43 ,"WI" : 44 ,"WE" : 45 ,"WO" : 46 ,
    "N" : 47 , "Itération" : 48
}

indexToLabel = {
    0 : "A", 1 : "I", 2 : "U", 3 : "E", 4 : "O",
    5 : "KA", 6 : "KI", 7 : "KU", 8 : "KE", 9 : "KO", 
    10 : "SA", 11 : "SHI", 12 : "SU" , 13 : "SE" , 14 : "SO", 
    15 : "TA" , 16 : "CHI", 17 : "TSU", 18 : "TE" , 19 : "TO" , 
    20 : "NA" , 21 : "NI" , 22 : "NU" , 23 : "NE" , 24 : "NO" , 
    25 : "HA" , 26 : "HI" , 27 : "FU" , 28 : "HE" , 29 : "HO" , 
    30 : "MA" , 31 : "MI" , 32 : "MU" , 33 : "ME" , 34 : "MO" , 
    35 : "YA" , 36 : "YU" , 37 : "YO" , 
    38 : "RA" , 39 : "RI" , 40 : "RU" , 41 : "RE" , 42 : "RO" , 
    43 : "WA" , 44 : "WI" , 45 : "WE" , 46 : "WO" , 
    47 : "N" , 48: "Itération"
}

indexToHiragana = {
    0 : "あ", 1 : "い", 2 : "う", 3 : "え", 4 : "お	",
    5 : "か	", 6 : "き", 7 : "く", 8 : "け", 9 : "こ", 
    10 : "さ", 11 : "し", 12 : "す" , 13 : "せ" , 14 : "そ", 
    15 : "た" , 16 : "ち", 17 : "つ", 18 : "て" , 19 : "と" , 
    20 : "な" , 21 : "に" , 22 : "ぬ" , 23 : "ね" , 24 : "の" , 
    25 : "は" , 26 : "ひ" , 27 : "ふ" , 28 : "へ" , 29 : "ほ" , 
    30 : "ま" , 31 : "み" , 32 : "む" , 33 : "め" , 34 : "も" , 
    35 : "や" , 36 : "ゆ" , 37 : "よ" , 
    38 : "ら" , 39 : "り" , 40 : "る" , 41 : "れ" , 42 : "ろ" , 
    43 : "わ" , 44 : "ゐ" , 45 : "ゑ" , 46 : "を" , 
    47 : "ん", 48 : "ゝ"
}

## Chargement du dataset
Source du jeu de données : *https://github.com/rois-codh/kmnist*

In [3]:
X_train = np.load("dataset/k49-train-imgs.npz", mmap_mode='r')['arr_0']
Y_train = np.load("dataset/k49-train-labels.npz", mmap_mode='r')['arr_0']

X_test = np.load("dataset/k49-test-imgs.npz", mmap_mode='r')['arr_0']
Y_test = np.load("dataset/k49-test-labels.npz", mmap_mode='r')['arr_0']

input_shape = (28, 28, 1)
num_classes = 49

In [9]:
len(X_train) , len(X_test)

(232365, 38547)

Vectorisation des labels en one hot

In [5]:
oneHotvector = to_categorical(range(num_classes))

# Vectorisation du set d'entrainement 
temp = []  
for elem in Y_train :
    temp.append(oneHotvector[elem])
Y_train = temp.copy()

# Vectorisation du set de test
temp.clear()
for elem in Y_test :
    temp.append(oneHotvector[elem])
Y_test = temp.copy()
temp.clear()


## Modèle CNN

In [6]:
model = models.Sequential()
model.add(layers.Conv2D(32,kernel_size=3,activation='relu',input_shape=input_shape))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(32,kernel_size=3,activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(32,kernel_size=5,strides=2,padding='same',activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.4))

model.add(layers.Conv2D(64,kernel_size=3,activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(64,kernel_size=3,activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(64,kernel_size=5,strides=2,padding='same',activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.4))

model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.4))
model.add(layers.Dense(num_classes, activation='softmax'))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

Metal device set to: Apple M1 Max

systemMemory: 32.00 GB
maxCacheSize: 10.67 GB



2022-03-23 08:14:28.959452: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-03-23 08:14:28.959587: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


### Entrainement

In [8]:
batch_size = 20000
epochs = 20
X = tf.convert_to_tensor(X_train)
Y = tf.convert_to_tensor(Y_train)
model.fit(X, Y, batch_size=batch_size, epochs=epochs, validation_split=0.3, verbose=True)

### Test

In [None]:

X_t = tf.convert_to_tensor(X_test)
Y_t = tf.convert_to_tensor(Y_test)

model.evaluate(X_t,Y_t, batch_size=batch_size)

## Test du model CNN - *Hiragana_CNN_v1*

In [7]:
batch_size = 20000

model = load_model('Hiragana_CNN_v1')
X_t = tf.convert_to_tensor(X_test)
Y_t = tf.convert_to_tensor(Y_test)

model.evaluate(X_t,Y_t, batch_size=batch_size)

2022-03-23 08:14:37.518379: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-03-23 08:14:37.580511: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.




[0.24792486429214478, 0.9365190267562866]