In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.ft2font as ft
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
plt.rcParams['font.family'] = 'Noto Sans CJK JP'

In [None]:
os.chdir('datasets/kmnist')

In [None]:
os.listdir()

In [None]:
X_train = np.load('kmnist-train-imgs.npz')
X_test = np.load('kmnist-test-imgs.npz')

y_train = np.load('kmnist-train-labels.npz')
y_test = np.load('kmnist-test-labels.npz')

In [None]:
# Let's see the shape of the images
width, height = X_train['arr_0'][0].shape

print("width: {} - height: {}".format(width, height))

In [None]:
# Number of samples
print("train samples: {} - train labels: {}".format(len(X_train['arr_0']), len(y_train['arr_0'])))
print("test samples: {} - test labels: {}".format(len(X_test['arr_0']), len(y_test['arr_0'])))

In [None]:
classmap = pd.read_csv('kmnist_classmap.csv', index_col='index')
classmap.head()

In [None]:
def label_to_char(label):
    return classmap.loc[label, 'char']

In [None]:
y_train['arr_0'][:6]

In [None]:
# Unique labels:
np.unique(y_train['arr_0'])

In [None]:
[label_to_char(x) for x in np.unique(y_train['arr_0'])]

In [None]:
fig, ax = plt.subplots(6, 1, figsize=(width, height))

for i, sample in enumerate(X_train['arr_0'][:6]):
    ax[i].title.set_text(label_to_char(y_train['arr_0'][i]))
    ax[i].imshow(sample, cmap='binary')

In [None]:
# Let's transform the targets to one hot encoded arrays
from sklearn.preprocessing import OneHotEncoder
one_hot = OneHotEncoder()
y_train = one_hot.fit_transform(y_train['arr_0'].reshape(-1, 1))
y_test = one_hot.transform(y_test['arr_0'].reshape(-1, 1))

In [None]:
y_train[:6].toarray()

In [None]:
# Reshape images to have an extra dimension for color channel when using Conv2D
X_train = X_train['arr_0'].reshape(-1, width, height, 1)
X_test = X_test['arr_0'].reshape(-1, width, height, 1)

In [None]:
# Let's try a simple Net
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, Dropout

In [None]:
from keras.models import load_model

model = load_model('kmnist-conv.h5')

In [None]:
model = Sequential()
model.add(Conv2D(1, (3,3), input_shape=(width, height, 1), activation='relu'))
model.add(Dropout(0.1))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(width * height))
model.add(Dense(10, activation='softmax'))

In [None]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(x=X_train, y=y_train, batch_size=150, epochs=10, validation_split=0.2)

In [None]:
model.evaluate(X_test, y_test)

In [None]:
model.save("kmnist-conv.h5")