# 한글 손글씨 분류 어플리케이션을 위한 Keras 모델 생성
## Hand Written Korean Classification Application and Keras Model
### with Keras, Tensorflow, CoreML, iOS

1. 한글 손글씨 이미지 데이터 호출 및 데이터 전처리
2. Keras 모델 생성 및 훈련
3. CoreML 모델로 변환

## Part1. 한글 손글씨 이미지 데이터 호출 및 전처리


In [8]:
## 1-1. csv 파일에서 이미지 경로와 해당 이미지의 label 불러오기
## 1-1. Importing a image path and the image's label from csv file

import pandas as pd
import io 

csv_file_path = "labels-map.csv"
lable_file = "980_common_character.txt"

csv_file = io.open(csv_file_path, 'r', encoding='utf-8')
labels_file = io.open(lable_file, 'r', encoding='utf-8').read().splitlines()

# Map characters to indices.
label_dict = {}
count = 0
for label in labels_file:
    label_dict[label] = count
    count += 1

# Build the lists.
filenames = []
labels = []

for row in csv_file:
    path, label = row.strip().split(',')
    filenames.append(path)
    labels.append(label_dict[label])


FileNotFoundError: [Errno 2] No such file or directory: 'labels-map.csv'

In [None]:
## 1-2. 두 개 리스트 랜덤으로 섞기
## 1-2. Shuffle two lists randomly

from subprocess import check_output
import random

seed = 999

shuffled_indices = list(range(len(filenames)))
random.seed(seed)
random.shuffle(shuffled_indices)
image_paths = [filenames[i] for i in shuffled_indices]
labels = [labels[i] for i in shuffled_indices]
print("csv finished")

In [None]:
# Visualizing the data
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2

for i in range(1, 20):
    img = mpimg.imread(image_paths[i])
    plt.figure(figsize=(2, 2))
    plt.imshow(img, cmap ='gray')
    
    print(labels[i])

In [None]:
## 이미지 파일로 이미지를 불러와서 어레이에 담는다

X_data = []

total_count = 0
prev_count = 0
  
for image_path in image_paths:
    image = cv2.imread(image_path)
    X_data.append(image)
    total_count += 1
    
    if total_count - prev_count > 5000:
      prev_count = total_count
      print('{} images added...'.format(total_count))
    
print("finished")

In [None]:
## 테스트셋/훈련셋 나누기

from sklearn.model_selection import train_test_split

X = np.array(X_data)
y = np.array(labels)

print(X.shape)
print(y.shape)


X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.25)

print("\nX_train:")
print(X_train.shape)

print("\nX_val")
print(X_val.shape)

print("\ny_train")
print(y_train.shape)

print("\ny_val")
print(y_val.shape)

In [None]:
from keras import backend as K

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Convolution2D
from keras.layers.convolutional import MaxPooling2D
from sklearn.preprocessing import LabelEncoder,OneHotEncoder

from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
import os
import keras
import keras.utils as utils

MODEL_SAVE_FOLDER_PATH = "./model/"
model_path = MODEL_SAVE_FOLDER_PATH + '{epoch:02d}-{val_loss:.4f}.hdf5'

if not os.path.exists(MODEL_SAVE_FOLDER_PATH):
    os.mkdir(MODEL_SAVE_FOLDER_PATH)

batch_size = 128
epochs = 500
num_classes = 980

y_train = utils.to_categorical(y_train, num_classes)
y_val = utils.to_categorical(y_val, num_classes)

cb_checkpoint = ModelCheckpoint(filepath=model_path, monitor='val_loss', verbose=1, save_best_only=True)

train_generator = ImageDataGenerator(rescale=1./255, 
                                     rotation_range=15, 
                                     width_shift_range=0.10, 
                                     height_shift_range=0.10, 
                                     shear_range=0.3, 
                                     zoom_range=[0.8, 1.2])

val_generator = ImageDataGenerator(rescale=1./255)

train_data_flow = train_generator.flow(X_train, y_train, 
                                       batch_size=batch_size)
val_data_flow = val_generator.flow(X_val, y_val,
                                   batch_size=batch_size)

img_rows, img_cols = 32, 32
input_shape = (img_rows, img_cols, 3)

model = Sequential()

model.add(Convolution2D(32, kernel_size=(5, 5),
                 activation='relu',
                 input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(64, (5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.25))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))


model.add(Flatten())
model.add(Dense(516, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='categorical_crossentropy', 
              optimizer=keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0),
              metrics=['accuracy'])


# 4. 모델 학습시키기
hist = model.fit_generator(train_data_flow,
                           epochs=epochs,
                           verbose=1,
                           steps_per_epoch = 282,
                           validation_data=val_data_flow,
                           validation_steps = 70,
                           callbacks=[cb_checkpoint])

score = model.evaluate(X_val, y_val, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
model.save('hangul.h5')

In [None]:
# 5. 모델 학습 과정 표시하기

acc = hist.history['acc']
val_acc = hist.history['val_acc']
loss = hist.history['loss']
val_loss = hist.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()



In [None]:
test_x = []
newimage = cv2.imread('./image-data/IMG_4543.png')
test_x.append(newimage)

testx = np.array(test_x)
testx = testx / 255

print(model.predict_classes(testx))