In [None]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt

In [None]:
from ssd_utils.ssd_gen import Generator
from ssd_utils.ssd_box import BBoxUtility, DefaultBox
from ssd_utils.ssd_training import MultiboxLoss

## difinition of folders

In [None]:
FOLDER = '../../../data_kaggle/kuzushiji/'
IMAGES = FOLDER + 'train_images/'
print(os.listdir(FOLDER))

In [None]:
df_train = pd.read_csv(FOLDER + 'train.csv')
df_train_idx = df_train.set_index("image_id")
idx_train = df_train['image_id']
unicode_map = {codepoint: char for codepoint, char in pd.read_csv(FOLDER + 'unicode_translation.csv').values}

In [None]:
def label_reader(label):
    try:
        code_arr = np.array(label['labels'].split(' ')).reshape(-1, 5)
    except ValueError as e:
        print(e)
    return code_arr

In [None]:
def get_center(coord):
    return np.vstack([coord[:, 0] + coord[:, 2] //2, coord[:, 1] + coord[:, 3] //2]).T

In [None]:
from sklearn.cluster import KMeans

def get_cluster_n(centers, min_n=3, max_n=10):
    stds_list = []
    for n in range(min_n, max_n):
        km = KMeans(n_clusters=n)
        X = centers.copy()
        X[:, 1] = X[:, 1]/100
        km_cluster = km.fit(X)

        df_center = pd.DataFrame(centers)
        df_center['col_n'] = km_cluster.labels_
        stds_list.append(df_center.groupby('col_n').std().mean().values)

    stds = np.array(stds_list)
    xsm = np.log(stds[:,0])
    n_xsm = np.argmin(xsm[1:] - xsm[:-1]) + 1
    
    return n_xsm + min_n

In [None]:
def gen_df_code(df_idx, idx):
    df_code = pd.DataFrame(label_reader(df_idx.loc[idx]), columns = ['char', 'x', 'y', 'w', 'h'])
    df_code['image_id'] = idx
    df_code = df_code.reset_index()
    df_code[['x','y','w','h']] = df_code[['x','y','w','h']].astype('int')

    centers = get_center(df_code[['x','y','w','h']].values)
    df_code[['center_x', 'center_y']] = pd.DataFrame(centers)

    X = centers.copy().astype('float')
    X[:, 1] = X[:, 1]/100
    df_code['col_n'] =  KMeans(n_clusters=get_cluster_n(centers)).fit(X).labels_
    
    new_col_n = np.zeros(0)
    new_index = np.zeros(0)
    cols = df_code.sort_values('center_x', ascending=False)['col_n'].unique()
    for i, col in enumerate(cols):
        temp = df_code.query('col_n == {}'.format(col))
        new_index = np.hstack([new_index, temp['index'].values])
        new_col_n = np.hstack([new_col_n, np.ones(len(temp)) * i])

    del df_code['col_n']
    df_new_idx = pd.DataFrame([new_index, new_col_n]).T
    df_new_idx.columns = ['index', 'col_n']
    df_code = pd.merge(df_code, df_new_idx, on='index').sort_values('col_n').reset_index(drop=True)
    del df_code['index']
    df_code['col_n'] = df_code['col_n'].astype('int')

    image_path = IMAGES + idx + '.jpg'
    img = cv2.imread(image_path)
    df_code['x_len'] = img.shape[1]
    df_code['y_len'] = img.shape[0]
    
    df_code[['x', 'w', 'center_x']] = df_code[['x', 'w', 'center_x']] / img.shape[1]
    df_code[['y', 'h', 'center_y']] = df_code[['y', 'h', 'center_y']] / img.shape[0]
    
    
    return df_code

In [None]:
def gen_string(df_code):
    cols = df_code['col_n'].unique()
    chars = []
    for col in cols:
        chars.extend(df_code.query('col_n == {}'.format(col)).sort_values('center_y')['char'].replace(unicode_map))
        chars.append(' ')

    string = ''
    for c in chars:
        string += c

    print(string)

In [None]:
gen_df_code(df_train_idx, idx)

In [None]:
for idx in idx_train[200:210]:
    df_code = gen_df_code(df_train_idx, idx)
    gen_string(df_code)

    image_path = IMAGES + idx + '.jpg'
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    cols = df_code['col_n'].unique()
    for col in cols:
        centers = df_code.query('col_n == {}'.format(col))[['center_x','center_y']].values
        plt.scatter(centers[:,0], centers[:,1])
    plt.imshow(img)
    plt.show()

In [None]:
df_code

## preparation for SSD training

In [None]:
len(TRUE_FILES) + len(FALSE_FILES)

In [None]:
df_raw = pd.read_csv(CSV_FOLDER+'ext_face_flg_size.csv')

In [None]:
df_pos = df_raw[df_raw['flg']==1]
df_neg = df_raw[df_raw['flg']==0]

In [None]:
def calc_loc(df):
    xsize_arr = df['xsize'].values
    ysize_arr = df['ysize'].values
    x0_arr = df['x0'].values / xsize_arr
    x1_arr = df['x1'].values / xsize_arr 
    y0_arr = df['y0'].values / ysize_arr
    y1_arr = df['y1'].values / ysize_arr
    locs = np.vstack([y0_arr, x0_arr, y1_arr, x1_arr]).T
    return locs

In [None]:
loc_pos = calc_loc(df_pos)
loc_neg = calc_loc(df_neg)

In [None]:
locs = np.vstack([loc_pos, loc_neg])

In [None]:
locs.shape

In [None]:
ohe = OneHotEncoder()
names = df_pos['name'].values.reshape(-1, 1)
label_pos = ohe.fit_transform(names).toarray()

# label_pos = np.zeros([loc_pos.shape[0], 1])
# label_pos[:, 0] = 1

label_bg_pos = np.zeros([label_pos.shape[0], 1])
label_pos = np.hstack([label_bg_pos, label_pos])

label_neg = np.zeros([loc_neg.shape[0], label_pos.shape[1]])
# label_neg[:, 0] = 1


print(label_pos.shape)
print(label_neg.shape)

In [None]:
labels = np.vstack([label_pos, label_neg])
labels.shape

In [None]:
loc_label_arr = np.hstack([locs, labels])
loc_label_arr.shape

In [None]:
def gen_file_list(df):
    fname_list = []
    files = df['file_path']
    for i, f in enumerate(files):
        fname = df['name'].values[i] + '/' + f.split('/')[-1]
        fname_list.append(fname)
    return fname_list

In [None]:
fname_list = []
fname_list_pos = gen_file_list(df_pos)
fname_list_neg = gen_file_list(df_neg)
fname_list.extend(fname_list_pos)
fname_list.extend(fname_list_neg)
len(fname_list)

In [None]:
gt = {}
se = pd.Series(fname_list)
idx = se.value_counts().index
for i in idx:
    mask = se == i
    gt[i] = loc_label_arr[mask]

In [None]:
keys = gt.keys()
num = len(keys)
num_train = round(num * 0.9)
pickup = random.sample(range(num), num)
pickup_train = pickup[:num_train]
pickup_val = pickup[num_train:]
keys_train = list(np.array(list(keys))[pickup_train])
keys_val = list(np.array(list(keys))[pickup_val])

In [None]:
len(pickup_train) + len(pickup_val)

## pretraining model

In [None]:
# from keras.applications.vgg16 import VGG16
# vgg16_original = VGG16(include_top=True,
#                                weights='imagenet',
#                                input_tensor=None, 
#                                input_shape=None, 
#                                pooling=None, 
#                                classes=1000)
# vgg16_original.save_weights('vgg16_original.hdf5')

In [None]:
NUM_CLASSES = label_pos.shape[1]
input_shape = (300, 300, 3) # (y, x, c)
variances = [0.1, 0.1, 0.2, 0.2]
model_obj = SSD_VGG16(num_classes=NUM_CLASSES, img_size=input_shape, variances=variances)

In [None]:
model = model_obj.SSD()
for L in model.layers[:19]:
    L.trainable = False

In [None]:
model.summary()

In [None]:
det_list = model_obj.get_detector()

In [None]:
img_size = input_shape[:2]
priors = np.zeros(8).reshape(1,-1)
aspects = [1.0, 1.0, 2, 3, 1/2, 1/3]
for i in range(len(det_list)):
    det_grid = np.array([det_list[i].shape[1].value, det_list[i].shape[2].value])
    steps = (img_size / det_grid).astype(np.int)

    y_cent_arr = (np.linspace(steps[0]/2, img_size[0]-steps[0]/2, det_grid[0]))
    x_cent_arr = (np.linspace(steps[1]/2, img_size[1]-steps[1]/2, det_grid[1]))
    y_cent, x_cent = np.meshgrid(y_cent_arr, x_cent_arr)
    y_cent = y_cent.reshape(-1,1)
    x_cent = x_cent.reshape(-1,1)

    y_var = np.ones_like(y_cent).reshape(-1,1) * variances[0]
    x_var = np.ones_like(x_cent).reshape(-1,1) * variances[1]
    h_var = np.ones_like(y_cent).reshape(-1,1) * variances[2]
    w_var = np.ones_like(x_cent).reshape(-1,1) * variances[3]

    for asp in aspects:
        h_arr = np.ones(det_grid[0]**2).reshape(-1,1) * steps[0] * asp
        w_arr = np.ones(det_grid[1]**2).reshape(-1,1) * steps[1] / asp
        y_mins  = np.clip(y_cent - h_arr // 2, 0, img_size[0])
        x_mins  = np.clip(x_cent - h_arr // 2, 0, img_size[1])
        y_maxs  = np.clip(y_cent + h_arr // 2, 0, img_size[0])
        x_maxs = np.clip(x_cent + w_arr // 2, 0, img_size[1])
        priors_pos = np.hstack([y_mins, x_mins, y_maxs, x_maxs])
        priors_var = np.hstack([y_var, x_var, h_var, w_var])
        priors_dum = np.hstack([priors_pos, priors_var])
        priors = np.vstack([priors, priors_dum])

priors = priors[1:]
priors[:, 0] = priors[:, 0] / img_size[0]
priors[:, 1] = priors[:, 1] / img_size[1]
priors[:, 2] = priors[:, 2] / img_size[0]
priors[:, 3] = priors[:, 3] / img_size[1]
priors.shape #(4*4*6, 8)

In [None]:
bbox_util = BBoxUtility(NUM_CLASSES, priors)

In [None]:
batch_size = 16
gen = Generator(gt, bbox_util, batch_size, SCR_FOLDER,
                keys_train, keys_val,
                input_shape, do_crop=True)

In [None]:
base_lr = 0.01
adm = keras.optimizers.Adam(lr=base_lr)
model.compile(optimizer=adm,
              loss=MultiboxLoss(NUM_CLASSES, alpha=1.0).compute_loss)

In [None]:
epochs = 100
history = model.fit_generator(gen.generate(True), 
                              steps_per_epoch=(len(keys_train)//batch_size) // 2, 
                              verbose=1,
                              epochs=epochs,
                              validation_data=gen.generate(False),
                              validation_steps=(len(keys_val)//batch_size)) // 2

In [None]:
# j = 1693
j = 2212
img_path = SCR_FOLDER + fname_list[j]
img = image.load_img(img_path, target_size=input_shape[:2])
img = image.img_to_array(img)
x = img.reshape((-1,)+input_shape) / 255
y_pred = model.predict(x) 
# (sample, defaultbox, (4(output: cy, cx, h, w), 2(num_class), 8(default box: ymin, xmin, ymax, xmax, varyc, varxc, varh, varw)))
results = bbox_util.detection_out(y_pred, confidence_threshold=0.001)

img = image.load_img(img_path)
img = image.img_to_array(img)

gt_ = gt[fname_list[j]]
det_ymin = gt_[:, 0]
det_xmin = gt_[:, 1]
det_ymax = gt_[:, 2]
det_xmax = gt_[:, 3]

plt.imshow(img / 255.)
currentAxis = plt.gca()
for i in range(len(gt_)):
    ymin = int(round(det_ymin[i] * img.shape[0]))
    xmin = int(round(det_xmin[i] * img.shape[1]))
    ymax = int(round(det_ymax[i] * img.shape[0]))
    xmax = int(round(det_xmax[i] * img.shape[1]))

    coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1

    currentAxis.add_patch(plt.Rectangle(*coords, fill=False,  edgecolor='white', linewidth=2))

det_label = results[0][:, 0]
det_conf = results[0][:, 1]
det_ymin = results[0][:, 2]
det_xmin = results[0][:, 3]
det_ymax = results[0][:, 4]
det_xmax = results[0][:, 5]

# Get detections with confidence higher than 0.6.
top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.007]

top_conf = det_conf[top_indices]
top_label_indices = det_label[top_indices].tolist()
top_ymin = det_ymin[top_indices]
top_xmin = det_xmin[top_indices]
top_ymax = det_ymax[top_indices]
top_xmax = det_xmax[top_indices]

colors = plt.cm.hsv(np.linspace(0, 1, 4)).tolist()

plt.imshow(img / 255.)
currentAxis = plt.gca()

for i in range(top_conf.shape[0]):
    ymin = int(round(top_ymin[i] * img.shape[0]))
    xmin = int(round(top_xmin[i] * img.shape[1]))
    ymax = int(round(top_ymax[i] * img.shape[0]))
    xmax = int(round(top_xmax[i] * img.shape[1]))
    score = top_conf[i]
    label = int(top_label_indices[i])
    display_txt = '{:0.2f}, {}'.format(score, label)
    coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1
    color = colors[0]
    currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
#     currentAxis.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.5})

plt.show()

In [None]:
det_conf

In [None]:
# j = 1693
j = 2634
img_path = SCR_FOLDER + fname_list[j]
img = image.load_img(img_path, target_size=input_shape[:2])
img = image.img_to_array(img)
x = img.reshape((-1,)+input_shape) / 255
y_pred = model.predict(x) 
# (sample, defaultbox, (4(output: cy, cx, h, w), 2(num_class), 8(default box: ymin, xmin, ymax, xmax, varyc, varxc, varh, varw)))
results = bbox_util.detection_out(y_pred, confidence_threshold=0.001)

img = image.load_img(img_path)
img = image.img_to_array(img)

gt_ = gt[fname_list[j]]
det_ymin = gt_[:, 0]
det_xmin = gt_[:, 1]
det_ymax = gt_[:, 2]
det_xmax = gt_[:, 3]

plt.imshow(img / 255.)
currentAxis = plt.gca()
for i in range(len(gt_)):
    ymin = int(round(det_ymin[i] * img.shape[0]))
    xmin = int(round(det_xmin[i] * img.shape[1]))
    ymax = int(round(det_ymax[i] * img.shape[0]))
    xmax = int(round(det_xmax[i] * img.shape[1]))

    coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1

    currentAxis.add_patch(plt.Rectangle(*coords, fill=False,  edgecolor='white', linewidth=2))

det_label = results[0][:, 0]
det_conf = results[0][:, 1]
det_ymin = results[0][:, 2]
det_xmin = results[0][:, 3]
det_ymax = results[0][:, 4]
det_xmax = results[0][:, 5]

# Get detections with confidence higher than 0.6.
top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.006]

top_conf = det_conf[top_indices]
top_label_indices = det_label[top_indices].tolist()
top_ymin = det_ymin[top_indices]
top_xmin = det_xmin[top_indices]
top_ymax = det_ymax[top_indices]
top_xmax = det_xmax[top_indices]

colors = plt.cm.hsv(np.linspace(0, 1, 4)).tolist()

plt.imshow(img / 255.)
currentAxis = plt.gca()

for i in range(top_conf.shape[0]):
    ymin = int(round(top_ymin[i] * img.shape[0]))
    xmin = int(round(top_xmin[i] * img.shape[1]))
    ymax = int(round(top_ymax[i] * img.shape[0]))
    xmax = int(round(top_xmax[i] * img.shape[1]))
    score = top_conf[i]
    label = int(top_label_indices[i])
    display_txt = '{:0.2f}, {}'.format(score, label)
    coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1
    color = colors[0]
    currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
#     currentAxis.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.5})

plt.show()