Kuzushiji-MNIST is a drop-in replacement for the MNIST dataset (28x28 grayscale, 70,000 images), provided in the original MNIST format as well as a NumPy format. Since MNIST restricts us to 10 classes, we chose one character to represent each of the 10 rows of Hiragana when creating Kuzushiji-MNIST.

Kuzushiji-49, as the name suggests, has 49 classes (28x28 grayscale, 270,912 images), is a much larger, but imbalanced dataset containing 48 Hiragana characters and one Hiragana iteration mark.

Kuzushiji-Kanji is an imbalanced dataset with a total of 3,832 Kanji characters (64x64 grayscale, 140,424 images), ranging from 1,766 examples to only a single example per class.


In [1]:
from tensorflow import keras
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
import os

In [2]:
os.chdir(r'C:\Users\dic_l\Desktop\japanese handwritten ocr')

download the 49 kuzushiji data from https://github.com/rois-codh/kmnist

In [3]:
# downloaded from KMNIST
train_images = np.load("k49-train-imgs.npz")['arr_0']
train_labels = np.load("k49-train-labels.npz")['arr_0']
test_images = np.load("k49-test-imgs.npz")['arr_0']
test_labels = np.load("k49-test-labels.npz")['arr_0']

In [4]:
if K.image_data_format() == "channels_first":
  train_images = train_images.reshape(train_images.shape[0], 1,28,28)
  test_images = test_images.reshape(test_images.shape[0], 1,28,28)
  shape = (1,28,28)
else:
  train_images = train_images.reshape(train_images.shape[0], 28, 28, 1)
  test_images = test_images.reshape(test_images.shape[0], 28, 28, 1)
  shape = (28,28,1)

In [5]:
train_images = train_images/255.0

test_images = test_images / 255.0

In [6]:
datagen = ImageDataGenerator(rotation_range=15,zoom_range=0.2)
datagen.fit(train_images)
model = keras.Sequential([
	keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=shape),
	keras.layers.MaxPooling2D(2,2),
	keras.layers.Conv2D(64, (3,3), activation='relu'),
	keras.layers.MaxPooling2D(2,2),
  keras.layers.Flatten(),
  keras.layers.Dropout(0.5),
  keras.layers.Dense(512, activation='relu'),
  keras.layers.Dense(49, activation="softmax")
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
#model.summary()

model.compile(optimizer='adam', loss="sparse_categorical_crossentropy", metrics=['accuracy'])

model.fit(datagen.flow(train_images,train_labels,shuffle=True),epochs=50,validation_data=(test_images,test_labels),callbacks = [keras.callbacks.EarlyStopping(patience=8,verbose=1,restore_best_weights=True),keras.callbacks.ReduceLROnPlateau(factor=0.5,patience=3,verbose=1)])


#test_loss, test_acc = model.evaluate(test_images2, test_labels)
#print("Accuracy: ", test_acc)

model.save("kuzushiji.h5")

Epoch 1/50


  self._warn_if_super_not_called()


[1m7262/7262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 5ms/step - accuracy: 0.7205 - loss: 1.0420 - val_accuracy: 0.8875 - val_loss: 0.4206 - learning_rate: 0.0010
Epoch 2/50
[1m7262/7262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 6ms/step - accuracy: 0.8892 - loss: 0.3857 - val_accuracy: 0.9135 - val_loss: 0.3362 - learning_rate: 0.0010
Epoch 3/50
[1m7262/7262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 5ms/step - accuracy: 0.9101 - loss: 0.3094 - val_accuracy: 0.9191 - val_loss: 0.3135 - learning_rate: 0.0010
Epoch 4/50
[1m7262/7262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 5ms/step - accuracy: 0.9185 - loss: 0.2797 - val_accuracy: 0.9263 - val_loss: 0.2878 - learning_rate: 0.0010
Epoch 5/50
[1m7262/7262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 5ms/step - accuracy: 0.9248 - loss: 0.2595 - val_accuracy: 0.9340 - val_loss: 0.2723 - learning_rate: 0.0010
Epoch 6/50
[1m7262/7262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

