In [1]:
import matplotlib.pyplot as plt
import numpy as np
from keras.preprocessing import image
import pickle
import os
import pandas as pd
import PIL
import random
import keras
from sklearn.preprocessing import OneHotEncoder
import importlib

Using TensorFlow backend.


In [2]:
from ssd_cnn import SSD_CNN
from ssd_utils import BBoxUtility
from gen import Generator
from ssd_training import MultiboxLoss
from ssd_layer import DefaultBox

## difinition of folders

In [3]:
FOLDER = '../'
CSV_FOLDER = FOLDER + 'csv/'
EXT_FOLDER = FOLDER + 'extracted/'
EXT_TRUE = EXT_FOLDER + 'face_true/'
EXT_FALSE = EXT_FOLDER + 'face_false/'
SCR_FOLDER = FOLDER + 'scraped/images_best_titles/'

FALSE_FILES = os.listdir(EXT_FALSE)
TRUE_FILES = os.listdir(EXT_TRUE)

## preparation for SSD training

In [4]:
df_raw = pd.read_csv(CSV_FOLDER+'ext_face_flg_size.csv')

In [5]:
df_raw.head()

Unnamed: 0,name,file_path,x0,x1,y0,y1,file,flg,colors,ysize,xsize
0,三上悠亜,images_best_titles/三上悠亜/ssni00409jp-9.jpg,450,544,96,190,face_true/ssni00409jp-9_0.jpg,1,3,534,800
1,三上悠亜,images_best_titles/三上悠亜/ssni00409jp-10.jpg,426,507,130,211,face_true/ssni00409jp-10_0.jpg,1,3,526,643
2,三上悠亜,images_best_titles/三上悠亜/sivr00016jp-1.jpg,359,472,65,178,face_true/sivr00016jp-1_0.jpg,1,3,532,800
3,三上悠亜,images_best_titles/三上悠亜/sivr00016jp-4.jpg,245,483,110,348,face_true/sivr00016jp-4_0.jpg,1,3,532,800
4,三上悠亜,images_best_titles/三上悠亜/sivr00016jp-6.jpg,369,476,54,161,face_true/sivr00016jp-6_0.jpg,1,3,534,800


In [6]:
df_raw.shape

(37450, 11)

In [7]:
df_pos = df_raw[df_raw['flg']==1]
df_neg = df_raw[df_raw['flg']==0]

In [8]:
def calc_loc(df):
    xsize_arr = df['xsize'].values
    ysize_arr = df['ysize'].values
    x0_arr = df['x0'].values / xsize_arr
    x1_arr = df['x1'].values / xsize_arr 
    y0_arr = df['y0'].values / ysize_arr
    y1_arr = df['y1'].values / ysize_arr
    locs = np.vstack([y0_arr, x0_arr, y1_arr, x1_arr]).T
    return locs

In [9]:
loc_pos = calc_loc(df_pos)
loc_neg = calc_loc(df_neg)

In [10]:
locs = loc_pos #np.vstack([loc_pos, loc_neg])

In [11]:
locs.shape

(8441, 4)

In [12]:
ohe = OneHotEncoder()
names = df_pos['name'].values.reshape(-1, 1)
label_pos = ohe.fit_transform(names).toarray()
label_neg = np.zeros([loc_neg.shape[0], label_pos.shape[1]])
print(label_pos.shape)
print(label_neg.shape)

(8441, 100)
(29009, 100)


In [13]:
labels = label_pos #np.vstack([label_pos, label_neg])
labels.shape

(8441, 100)

In [14]:
# neg_mask = df['flg'] == 0
# labels[neg_mask] = np.zeros(labels.shape[1])

In [15]:
loc_label_arr = np.hstack([locs, labels])
loc_label_arr.shape

(8441, 104)

In [16]:
def gen_file_list(df):
    fname_list = []
    files = df['file_path']
    for i, f in enumerate(files):
        fname = df['name'].values[i] + '/' + f.split('/')[-1]
        fname_list.append(fname)
    return fname_list

In [17]:
fname_list = []
fname_list_pos = gen_file_list(df_pos)
fname_list_neg = gen_file_list(df_neg)
fname_list.extend(fname_list_pos)
#fname_list.extend(fname_list_neg)
len(fname_list)

8441

In [18]:
# shape_list = []
# for f in fname_list:
#     img_path = SCR_FOLDER + f
#     img = image.load_img(img_path, target_size=(300, 300))
#     img = image.img_to_array(img)
#     if img.shape != (300, 300, 3):
#         shape_list.append(img.shape)

In [19]:
# shape_list

In [20]:
gt = {}
se = pd.Series(fname_list)
idx = se.value_counts().index
for i in idx:
    mask = se == i
    gt[i] = loc_label_arr[mask]

In [21]:
keys = gt.keys()
num = len(keys)
num_train = int(round(num * 0.8))
pickup = random.sample(range(num), num)
pickup_train = pickup[:num_train]
pickup_val = pickup[num_train:]
keys_train = list(np.array(list(keys))[pickup_train])
keys_val = list(np.array(list(keys))[pickup_val])

## pretraining model

In [22]:
NUM_CLASSES = labels.shape[1]
input_shape = (300, 300, 3) # (y, x, c)

In [23]:
model_obj = SSD_CNN(NUM_CLASSES, img_size=input_shape)
base_model = model_obj.CNN()

In [24]:
# base_model.load_weights('vgg16.hdf5')

In [25]:
model = model_obj.SSD()
det_list = model_obj.get_detector()

In [27]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              (None, 300, 300, 3)  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 300, 300, 64) 1792        input[0][0]                      
__________________________________________________________________________________________________
pool1 (MaxPooling2D)            (None, 100, 100, 64) 0           conv1[0][0]                      
__________________________________________________________________________________________________
conv2 (Conv2D)                  (None, 100, 100, 128 73856       pool1[0][0]                      
__________________________________________________________________________________________________
pool2 (Max

In [28]:
# for L in model.layers[:19]:
#     L.trainable = False

In [29]:
# model.summary()

In [30]:
img_size = input_shape[:2]
priors = np.zeros(8).reshape(1,-1)
aspects = [1.0, 1.0, 2, 1/2, 3, 1/3]
for i in range(len(det_list)):
    det_grid = np.array([det_list[i].shape[1].value, det_list[i].shape[2].value])
    steps = (img_size / det_grid).astype(np.int)
    
    y_cent_arr = (np.linspace(steps[0]/2, img_size[0]-steps[0]/2, det_grid[0])) / img_size[0]
    x_cent_arr = (np.linspace(steps[1]/2, img_size[1]-steps[1]/2, det_grid[1])) / img_size[1]
    y_cent, x_cent = np.meshgrid(y_cent_arr, x_cent_arr)
    y_cent = y_cent.reshape(-1,1)
    x_cent = x_cent.reshape(-1,1)

    y_var = np.ones_like(y_cent).reshape(-1,1) * 0.1
    x_var = np.ones_like(x_cent).reshape(-1,1) * 0.1
    h_var = np.ones_like(y_cent).reshape(-1,1) * 0.2
    w_var = np.ones_like(x_cent).reshape(-1,1) * 0.2
    
    for asp in aspects:
        h_arr = np.ones(det_grid[0]**2).reshape(-1,1) * steps[0] * asp / img_size[0]
        w_arr = np.ones(det_grid[1]**2).reshape(-1,1) * steps[1] / asp / img_size[1]
        priors_pos = np.hstack([y_cent, x_cent, h_arr, w_arr])
        priors_var = np.hstack([y_var, x_var, h_var, w_var])
        priors_dum = np.hstack([priors_pos, priors_var])
        priors = np.vstack([priors, priors_dum])

priors = priors[1:]
priors.shape

(960, 8)

In [31]:
bbox_util = BBoxUtility(NUM_CLASSES, priors)

In [32]:
gen = Generator(gt, bbox_util, 8, SCR_FOLDER,
                keys_train, keys_val,
                input_shape, do_crop=False)

In [33]:
base_lr = 3e-4
adm = keras.optimizers.Adam(lr=base_lr)
model.compile(optimizer=adm,
              loss=MultiboxLoss(NUM_CLASSES).compute_loss)

In [None]:
epochs = 15
history = model.fit_generator(gen.generate(True), gen.train_batches,verbose=1,
                              validation_data=gen.generate(False),
                              epochs=epochs,
                              validation_steps=gen.val_batches)

Epoch 1/15
1149/6528 [====>.........................] - ETA: 20:41 - loss: 5.0221

In [None]:
i = 50
img_path = SCR_FOLDER + fname_list[i]
img = image.load_img(img_path, target_size=(300, 300))
img = image.img_to_array(img)
x = img.reshape(-1, 300, 300, 3) / 255
y_pred = model.predict(x) 
# (sample, defaultbox, (4(cy, cx, h, w), 2(num_class), 8(ymin, xmin, ymax, xmax, varyc, varxc, varh, varw)))
y_pred.shape

In [None]:
results = bbox_util.detection_out(y_pred, confidence_threshold=0.001)
results

In [None]:
gt_ = gt[fname_list[i]]
det_ymin = gt_[:, 0]
det_xmin = gt_[:, 1]
det_ymax = gt_[:, 2]
det_xmax = gt_[:, 3]

plt.imshow(img / 255.)
currentAxis = plt.gca()

ymin = int(round(det_ymin[0] * img.shape[0]))
xmin = int(round(det_xmin[0] * img.shape[1]))
ymax = int(round(det_ymax[0] * img.shape[0]))
xmax = int(round(det_xmax[0] * img.shape[1]))

coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1

currentAxis.add_patch(plt.Rectangle(*coords, fill=False,  linewidth=2))

det_label = results[0][:, 0]
det_conf = results[0][:, 1]
det_ymin = results[0][:, 2]
det_xmin = results[0][:, 3]
det_ymax = results[0][:, 4]
det_xmax = results[0][:, 5]

# Get detections with confidence higher than 0.6.
top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.022]

top_conf = det_conf[top_indices]
top_label_indices = det_label[top_indices].tolist()
top_ymin = det_ymin[top_indices]
top_xmin = det_xmin[top_indices]
top_ymax = det_ymax[top_indices]
top_xmax = det_xmax[top_indices]

colors = plt.cm.hsv(np.linspace(0, 1, 4)).tolist()

plt.imshow(img / 255.)
currentAxis = plt.gca()

for i in range(top_conf.shape[0]):
    ymin = int(round(top_ymin[i] * img.shape[0]))
    xmin = int(round(top_xmin[i] * img.shape[1]))
    ymax = int(round(top_ymax[i] * img.shape[0]))
    xmax = int(round(top_xmax[i] * img.shape[1]))
    score = top_conf[i]
    label = int(top_label_indices[i])
#         label_name = voc_classes[label - 1]
    display_txt = '{:0.2f}, {}'.format(score, label)
    coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1
    color = colors[0]
    currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
    currentAxis.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.5})

plt.show()

In [None]:
det_conf