In [1]:
import pandas as pd
import numpy as np
import os
import time
import cv2
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm
import random

In [2]:
from keras.models import Model
from keras.layers import Conv2D, MaxPooling2D, Input, Dense
from keras.layers import Flatten, Reshape, Activation, Concatenate, Dropout
from keras.losses import categorical_crossentropy
from keras.optimizers import adam
from sklearn.preprocessing import  OneHotEncoder

Using TensorFlow backend.


In [3]:
from ssd_utils.ssd_cnn import SSD_CNN

In [4]:
FOLDER = '../../data_kaggle/kuzushiji/'
IMAGES = FOLDER + 'train_images/'
CHARS = FOLDER + 'chars/'
MODELS = FOLDER + 'models/'
print(os.listdir(FOLDER))

['unicode_translation.csv', 'train_images', 'sample_submission.csv', 'train.csv', 'kuzushiji-recognition.zip', 'output', 'test_images', 'chars', 'models']


In [5]:
# os.mkdir(CHARS)

In [6]:
df_train = pd.read_csv(FOLDER + 'train.csv')
df_sub = pd.read_csv(FOLDER + 'sample_submission.csv')
unicode_map = {codepoint: char for codepoint, char in pd.read_csv(FOLDER + 'unicode_translation.csv').values}

In [7]:
len(unicode_map)

4787

In [8]:
df_sub_idx = df_sub.set_index("image_id")
df_train_idx = df_train.set_index("image_id")
idx_train = df_train['image_id']

In [9]:
def label_reader(label):
    try:
        code_arr = np.array(label['labels'].split(' ')).reshape(-1, 5)
    except:
        return
    return code_arr

In [10]:
CHAR_SIZE = (100, 100)

In [11]:
for idx in tqdm(idx_train[:2]):
    code = df_train_idx.loc[idx]
    try:
        code_arr = np.array(code['labels'].split(' ')).reshape(-1, 5)
    except:
        continue
    df_char = pd.DataFrame(code_arr, columns=['unicode', 'x', 'y', 'w', 'h'])
    df_char[['x','y','w','h']] = df_char[['x','y','w','h']].astype('int')

    path = IMAGES + idx + '.jpg'
    img = cv2.imread(path)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    codes = df_char['unicode']
    positions = df_char[['x','y', 'w','h']].values
    char_arr = np.zeros([0, CHAR_SIZE[0], CHAR_SIZE[1]])
    for pos in positions :
        x, y, w, h = pos
        img_char = img_gray[y:y+h, x:x+w]
        w = img_char.shape[1]
        h = img_char.shape[0]
        if w > h:
            delta = (w - h) // 2
            pad = (np.ones([delta, w]) * 255).astype('int')
            img_pad = (255 - np.vstack([pad, img_char, pad])).astype('float32')        
            img_pad_resize = cv2.resize(img_pad, CHAR_SIZE).astype('int')
            char_arr = np.vstack([char_arr, img_pad_resize.reshape([1, CHAR_SIZE[0], CHAR_SIZE[1]])])
        else:
            delta = (h - w) // 2
            pad = (np.ones([h, delta]) * 255).astype('int')
            img_pad = (255 - np.hstack([pad, img_char, pad])).astype('float32')
            img_pad_resize = cv2.resize(img_pad, CHAR_SIZE).astype('int')        
            char_arr = np.vstack([char_arr, img_pad_resize.reshape([1, CHAR_SIZE[0], CHAR_SIZE[1]])])

    with open(CHARS + idx + '.pickle', 'wb') as f:
        pickle.dump(char_arr, f)
        pickle.dump(codes, f)

100%|██████████| 2/2 [00:00<00:00, 11.03it/s]


In [12]:
num_classes = len(unicode_map)
cnn_size = (100, 100, 1)
ssd_size = (300, 300, 1)
ssd_obj = SSD_CNN(num_classes, cnn_size, ssd_size)

In [13]:
model = ssd_obj.build_cnn()

In [14]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cnn_input (InputLayer)       (None, 100, 100, 1)       0         
_________________________________________________________________
conv1_1 (Conv2D)             (None, 100, 100, 32)      320       
_________________________________________________________________
conv1_2 (Conv2D)             (None, 100, 100, 32)      9248      
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 50, 50, 32)        0         
_________________________________________________________________
conv2_1 (Conv2D)             (None, 50, 50, 32)        9248      
_________________________________________________________________
conv2_2 (Conv2D)             (None, 50, 50, 32)        9248      
_________________________________________________________________
pool2 (MaxPooling2D)         (None, 25, 25, 32)        0         
__________

In [15]:
model.compile(optimizer=adam(),
              loss=categorical_crossentropy)

ohe = OneHotEncoder()
ohe.fit(pd.Series(list(unicode_map.keys())).unique().reshape(-1,1))
with open(MODELS + 'ohe.pickle', 'wb') as f:
    pickle.dump(ohe, f)

In [16]:
def data_flow(CHARS, idx, ohe, batch_size=32):
    while True:
        image_id = random.sample(list(idx),1)[0]
        
        try:
            with open(CHARS + image_id + '.pickle', 'rb') as f:
                char_arr = pickle.load(f) / 255
                codes = pickle.load(f)
        except:
            continue
            
        num = len(codes)
        try:
            pickup = random.sample(list(range(num)), batch_size)
        except:
            continue
            
        inputs = char_arr[pickup][:, :, :, np.newaxis]
        targets = ohe.transform(codes[pickup].values[:, np.newaxis]).toarray()
        
        yield inputs, targets

In [17]:
len_train = len(idx_train)
idx_rand = random.sample(list(idx_train), len_train)
idx_dev = idx_rand[:int(len_train*0.8)]
idx_val = idx_rand[int(len_train*0.8):]

gen_train = data_flow(CHARS, idx_dev, ohe, batch_size=32)
gen_val = data_flow(CHARS, idx_val, ohe, batch_size=32)

num_steps_dev = len(idx_dev)
num_steps_val = len(idx_val)

In [18]:
model.fit_generator(gen_train, steps_per_epoch=num_steps_dev, epochs=10, verbose=1, 
                    validation_data=gen_val, 
                    validation_steps=num_steps_val)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

KeyboardInterrupt: 

In [19]:
df_pred = pd.DataFrame()
df_pred['y_true'] = codes.replace(unicode_map)
df_pred['y_pred'] = pd.Series(ohe.inverse_transform(model.predict(char_arr[:,:,:,np.newaxis])).reshape(-1)).replace(unicode_map)

In [20]:
(df_pred['y_true'] == df_pred['y_pred']).mean()

0.6875

In [21]:
df_pred

Unnamed: 0,y_true,y_pred
0,は,は
1,か,ら
2,か,つ
3,い,い
4,に,ほ
5,に,に
6,へ,へ
7,が,が
8,か,か
9,ぬ,が


In [22]:
model.save(MODELS + 'model.hdf5', include_optimizer=False)