In [1]:
import pandas as pd
import numpy as np
import os
import time
import cv2
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm
import random

In [2]:
from keras.models import Model
from keras.layers import Conv2D, MaxPooling2D, Input, Dense
from keras.layers import Flatten, Reshape, Activation, Concatenate, Dropout
from keras.losses import categorical_crossentropy
from keras.optimizers import adam
from sklearn.preprocessing import  OneHotEncoder

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
FOLDER = '../../../data_handson/kuzushiji/'
IMAGES = FOLDER + 'train_images/'
CHARS = FOLDER + 'chars/'
MODELS = FOLDER + 'models/'
print(os.listdir(FOLDER))

['.DS_Store', 'chars', 'unicode_translation.csv', 'image', 'train.csv', 'train_images', 'sample_submission.csv']


In [4]:
# os.mkdir(CHARS)

In [16]:
df_train = pd.read_csv(FOLDER + 'train.csv')
df_sub = pd.read_csv(FOLDER + 'sample_submission.csv')
unicode_map = {codepoint: char for codepoint, char in pd.read_csv(FOLDER + 'unicode_translation.csv').values}

In [17]:
len(unicode_map)

4787

In [6]:
df_sub_idx = df_sub.set_index("image_id")
df_train_idx = df_train.set_index("image_id")
idx_train = df_train['image_id']

In [7]:
def label_reader(label):
    try:
        code_arr = np.array(label['labels'].split(' ')).reshape(-1, 5)
    except:
        return
    return code_arr

In [8]:
CHAR_SIZE = (100, 100)

In [9]:
for idx in tqdm(idx_train[:10]):
    code = df_train_idx.loc[idx]
    try:
        code_arr = np.array(code['labels'].split(' ')).reshape(-1, 5)
    except:
        continue
    df_char = pd.DataFrame(code_arr, columns=['unicode', 'x', 'y', 'w', 'h'])
    df_char[['x','y','w','h']] = df_char[['x','y','w','h']].astype('int')

    path = IMAGES + idx + '.jpg'
    img = cv2.imread(path)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    codes = df_char['unicode']
    positions = df_char[['x','y', 'w','h']].values
    char_arr = np.zeros([0, CHAR_SIZE[0], CHAR_SIZE[1]])
    for pos in positions :
        x, y, w, h = pos
        img_char = img_gray[y:y+h, x:x+w]
        w = img_char.shape[1]
        h = img_char.shape[0]
        if w > h:
            delta = (w - h) // 2
            pad = (np.ones([delta, w]) * 255).astype('int')
            img_pad = (255 - np.vstack([pad, img_char, pad])).astype('float32')        
            img_pad_resize = cv2.resize(img_pad, CHAR_SIZE).astype('int')
            char_arr = np.vstack([char_arr, img_pad_resize.reshape([1, CHAR_SIZE[0], CHAR_SIZE[1]])])
        else:
            delta = (h - w) // 2
            pad = (np.ones([h, delta]) * 255).astype('int')
            img_pad = (255 - np.hstack([pad, img_char, pad])).astype('float32')
            img_pad_resize = cv2.resize(img_pad, CHAR_SIZE).astype('int')        
            char_arr = np.vstack([char_arr, img_pad_resize.reshape([1, CHAR_SIZE[0], CHAR_SIZE[1]])])

    with open(CHARS + idx + '.pickle', 'wb') as f:
        pickle.dump(char_arr, f)
        pickle.dump(codes, f)

100%|██████████| 10/10 [00:06<00:00,  1.33it/s]


In [18]:
img_size = (CHAR_SIZE[0], CHAR_SIZE[1], 1)
inputs = Input(shape=img_size, name='cnn_input')

## Block 1
conv1_1 = Conv2D(64, (3, 3),activation='relu',padding='same',name='conv1_1')(inputs)
conv1_2 = Conv2D(64, (3, 3),activation='relu',padding='same',name='conv1_2')(conv1_1)
pool1 = MaxPooling2D((2,2),strides=(2,2),padding='same',name='pool1')(conv1_2)

conv2_1 = Conv2D(64, (3, 3),activation='relu',padding='same',name='conv2_1')(pool1)
conv2_2 = Conv2D(64, (3, 3),activation='relu',padding='same',name='conv2_2')(conv2_1)
pool2 = MaxPooling2D((2,2),strides=(2,2),padding='same',name='pool2')(conv2_2)

conv3_1 = Conv2D(64, (3, 3),activation='relu',padding='same',name='conv3_1')(pool2)
conv3_2 = Conv2D(64, (3, 3),activation='relu',padding='same',name='conv3_2')(conv3_1)
pool3 = MaxPooling2D((2,2),strides=(2,2),padding='same',name='pool3')(conv3_2)
flat3 = Flatten(name='flat3')(pool3)


dense1 = Dense(4787,activation='relu', name='dense1')(flat3)
dense2 = Dense(4787, activation='softmax', name='dense2')(dense1)
model =  Model(inputs, dense2)

In [19]:
model.compile(optimizer=adam(),
              loss=categorical_crossentropy)

ohe = OneHotEncoder()
ohe.fit(pd.Series(list(unicode_map.keys())).unique().reshape(-1,1))

OneHotEncoder(categorical_features=None, categories=None, drop=None,
              dtype=<class 'numpy.float64'>, handle_unknown='error',
              n_values=None, sparse=True)

In [20]:
def data_flow(CHARS, idx, ohe, batch_size=32):
    while True:
        image_id = random.sample(list(idx),1)[0]
        
        try:
            with open(CHARS + image_id + '.pickle', 'rb') as f:
                char_arr = pickle.load(f) / 255
                codes = pickle.load(f)
        except:
            continue
            
        num = len(codes)
        try:
            pickup = random.sample(list(range(num)), batch_size)
        except:
            continue
            
        inputs = char_arr[pickup][:, :, :, np.newaxis]
        targets = ohe.transform(codes[pickup].values[:, np.newaxis]).toarray()
        
        yield inputs, targets

In [21]:
len_train = len(idx_train)
idx_rand = random.sample(list(idx_train), len_train)
idx_dev = idx_rand[:int(len_train*0.8)]
idx_val = idx_rand[int(len_train*0.8):]

gen_train = data_flow(CHARS, idx_dev, ohe, batch_size=32)
gen_val = data_flow(CHARS, idx_val, ohe, batch_size=32)

num_steps_dev = len(idx_dev)
num_steps_val = len(idx_val)

In [22]:
model.fit_generator(gen_train, steps_per_epoch=num_steps_dev, epochs=1, verbose=1, 
                    validation_data=gen_val, 
                    validation_steps=num_steps_val)

Epoch 1/1
  10/3104 [..............................] - ETA: 2:54:12 - loss: 7.3611

KeyboardInterrupt: 

In [23]:
df_pred = pd.DataFrame()
df_pred['y_true'] = codes.replace(unicode_map)
df_pred['y_pred'] = pd.Series(ohe.inverse_transform(model.predict(char_arr[:,:,:,np.newaxis])).reshape(-1)).replace(unicode_map)

In [24]:
(df_pred['y_true'] == df_pred['y_pred']).mean()

0.05333333333333334

In [25]:
df_pred

Unnamed: 0,y_true,y_pred
0,へ,ば
1,つ,て
2,は,ば
3,は,い
4,べ,い
5,つ,て
6,▲,め
7,つ,て
8,か,て
9,四,め


In [26]:
model.save(MODELS + 'model.hdf5', include_optimizer=False)