In [1]:
import matplotlib.pyplot as plt
import numpy as np
from keras.preprocessing import image
import pickle
import os
import pandas as pd
import PIL
import random
import keras
from sklearn.preprocessing import OneHotEncoder
import importlib

Using TensorFlow backend.


In [2]:
from ssd_vgg16 import SSD_VGG16
from ssd_utils import BBoxUtility
from gen import Generator
from ssd_training import MultiboxLoss
from ssd_layer import DefaultBox

## difinition of folders

In [3]:
FOLDER = '../'
CSV_FOLDER = FOLDER + 'csv/'
EXT_FOLDER = FOLDER + 'extracted/'
EXT_TRUE = EXT_FOLDER + 'face_true/'
EXT_FALSE = EXT_FOLDER + 'face_false/'
SCR_FOLDER = FOLDER + 'scraped/images_best_titles/'

FALSE_FILES = os.listdir(EXT_FALSE)
TRUE_FILES = os.listdir(EXT_TRUE)

## preparation for SSD training

In [4]:
df_raw = pd.read_csv(CSV_FOLDER+'ext_face_flg_size.csv')

In [5]:
df = df_raw[df_raw['flg']==1]

In [6]:
xsize_arr = df['xsize'].values
ysize_arr = df['ysize'].values
x0_arr = df['x0'].values / xsize_arr
x1_arr = df['x1'].values / xsize_arr 
y0_arr = df['y0'].values / ysize_arr
y1_arr = df['y1'].values / ysize_arr
locs = np.vstack([x0_arr, y0_arr,x1_arr,y1_arr]).T
locs.shape

(8441, 4)

In [7]:
ohe = OneHotEncoder()
names = df['name'].values.reshape(-1, 1)
labels = ohe.fit_transform(names).toarray()
labels.shape

(8441, 100)

In [8]:
# neg_mask = df['flg'] == 0
# labels[neg_mask] = np.zeros(labels.shape[1])

In [9]:
loc_label_arr = np.hstack([locs, labels])
loc_label_arr.shape

(8441, 104)

In [10]:
fname_list = []
files = df['file_path']
for i, f in enumerate(files):
    fname = df['name'].values[i] + '/' + f.split('/')[-1]
    fname_list.append(fname)
len(fname_list)

8441

In [11]:
gt = {}
se = pd.Series(fname_list)
idx = se.value_counts().index
for i in idx:
    mask = se == i
    gt[i] = loc_label_arr[mask]

In [12]:
keys = gt.keys()
num = len(keys)
num_train = int(round(num * 0.8))
pickup = random.sample(range(num), num)
pickup_train = pickup[:num_train]
pickup_val = pickup[num_train:]
keys_train = list(np.array(list(keys))[pickup_train])
keys_val = list(np.array(list(keys))[pickup_val])

## pretraining model

In [13]:
NUM_CLASSES = labels.shape[1]
input_shape = (224, 224, 3)

In [14]:
model_obj = SSD_VGG16(NUM_CLASSES, img_size=input_shape)
base_model = model_obj.vgg16()

In [15]:
base_model.load_weights('vgg16.hdf5')

In [16]:
model = model_obj.SSD()
det_list = model_obj.get_detector()

In [17]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_1 (Conv2D)                (None, 224, 224, 64) 1792        input[0][0]                      
__________________________________________________________________________________________________
conv1_2 (Conv2D)                (None, 224, 224, 64) 36928       conv1_1[0][0]                    
__________________________________________________________________________________________________
pool1 (MaxPooling2D)            (None, 112, 112, 64) 0           conv1_2[0][0]                    
__________________________________________________________________________________________________
conv2_1 (C

In [18]:
for L in model.layers[:18]:
    L.trainable = False

In [19]:
img_size = input_shape[:2]
priors = np.zeros(8).reshape(1,-1)
aspects = [1.0, 1.0, 2, 1/2, 3, 1/3]
for i in range(len(det_list)):
    det_grid = np.array([det_list[i].shape[1].value, det_list[i].shape[2].value])
    steps = (img_size / det_grid).astype(np.int)
    
    x_cent_arr = (np.linspace(steps[0]/2, img_size[0]-steps[0]/2, det_grid[0])) / img_size[0]
    y_cent_arr = (np.linspace(steps[1]/2, img_size[1]-steps[1]/2, det_grid[1])) / img_size[1]
    x_cent, y_cent = np.meshgrid(x_cent_arr, y_cent_arr)
    x_cent = x_cent.reshape(-1,1)
    y_cent = y_cent.reshape(-1,1)

    w_arr = np.ones(det_grid[0]**2).reshape(-1,1) * steps[0]
    h_arr = np.ones(det_grid[1]**2).reshape(-1,1) * steps[1]

    x_var = np.ones_like(x_cent).reshape(-1,1) * 0.1
    y_var = np.ones_like(y_cent).reshape(-1,1) * 0.1
    w_var = np.ones_like(w_arr).reshape(-1,1) * 0.2
    h_var = np.ones_like(h_arr).reshape(-1,1) * 0.2
    
    for asp in aspects:
        w_arr = np.ones(det_grid[0]**2).reshape(-1,1) * steps[0] * asp
        h_arr = np.ones(det_grid[1]**2).reshape(-1,1) * steps[1] / asp
        priors_pos = np.hstack([x_cent, y_cent, w_arr, h_arr])
        priors_var = np.hstack([x_var, y_var, w_var, h_var])
        priors_dum = np.hstack([priors_pos, priors_var])
        priors = np.vstack([priors, priors_dum])

priors = priors[1:]
priors.shape

(2352, 8)

In [20]:
bbox_util = BBoxUtility(NUM_CLASSES, priors)

In [21]:
def schedule(epoch, decay=0.9):
    return base_lr * decay**(epoch)

callbacks = [keras.callbacks.ModelCheckpoint('./checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5',
                                             verbose=1,
                                             save_weights_only=True),
             keras.callbacks.LearningRateScheduler(schedule)]

In [22]:
gen = Generator(gt, bbox_util, 16, SCR_FOLDER,
                keys_train, keys_val,
                input_shape[:2], do_crop=False)

In [23]:
base_lr = 3e-4
adm = keras.optimizers.Adam(lr=base_lr)
model.compile(optimizer=adm,
              loss=MultiboxLoss(NUM_CLASSES).compute_loss)

In [24]:
epochs = 30
history = model.fit_generator(gen.generate(True), gen.train_batches,
                              epochs, verbose=1,
                              callbacks=callbacks,
                              validation_data=gen.generate(False),
                              nb_val_samples=gen.val_batches,
                              nb_worker=1)

  import sys
  import sys
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/30
   3/6528 [..............................] - ETA: 2:02:15 - loss: 82.6004

ResourceExhaustedError: OOM when allocating tensor with shape[16,64,224,224] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: conv1_2/convolution = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](conv1_1/Relu, conv1_2/kernel/read)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[Node: conv5_2__mbox_conf_flat/Shape/_301 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_219_conv5_2__mbox_conf_flat/Shape", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


In [None]:
img_path = SCR_FOLDER + fname_list[0]
img = image.load_img(img_path, target_size=(300, 300))
img = image.img_to_array(img)
x = img.reshape(-1, 300, 300, 3) / 255

In [None]:
y_pred = model.predict(x) 
# (sample, defaultbox, (4(cx, cy, w, h), 2(num_class), 8(xmin, ymin, xmax, ymax, varxc, varyc, varw, varh)))
y_pred[0, 0, :]

In [None]:
results = bbox_util.detection_out(y_pred)

In [None]:
results[0][:, 0]

In [None]:
det_xmin

In [None]:
i = 2001
gt_ = gt[fname_list[i]]
img_path = SCR_FOLDER + fname_list[i]
img = image.load_img(img_path, target_size=(300, 300))
img = image.img_to_array(img)
x = img.reshape(-1, 300, 300, 3) / 255

In [None]:
det_xmin= gt_[:, 0]
det_ymin = gt_[:, 1]
det_xmax = gt_[:, 2]
det_ymax = gt_[:, 3]

plt.imshow(img / 255.)
currentAxis = plt.gca()

xmin = int(round(det_xmin[0] * img.shape[1]))
ymin = int(round(det_ymin[0] * img.shape[0]))
xmax = int(round(det_xmax[0] * img.shape[1]))
ymax = int(round(det_ymax[0] * img.shape[0]))

coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1

currentAxis.add_patch(plt.Rectangle(*coords, fill=False,  linewidth=2))

plt.show()

In [None]:
i = 0
det_label = results[i][:, 0]
det_conf = results[i][:, 1]
det_xmin = results[i][:, 2]
det_ymin = results[i][:, 3]
det_xmax = results[i][:, 4]
det_ymax = results[i][:, 5]

# Get detections with confidence higher than 0.6.
top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.05]

top_conf = det_conf[top_indices]
top_label_indices = det_label[top_indices].tolist()
top_xmin = det_xmin[top_indices]
top_ymin = det_ymin[top_indices]
top_xmax = det_xmax[top_indices]
top_ymax = det_ymax[top_indices]

colors = plt.cm.hsv(np.linspace(0, 1, 4)).tolist()

plt.imshow(img / 255.)
currentAxis = plt.gca()

for i in range(top_conf.shape[0]):
    xmin = int(round(top_xmin[i] * img.shape[1]))
    ymin = int(round(top_ymin[i] * img.shape[0]))
    xmax = int(round(top_xmax[i] * img.shape[1]))
    ymax = int(round(top_ymax[i] * img.shape[0]))
    score = top_conf[i]
    label = int(top_label_indices[i])
#         label_name = voc_classes[label - 1]
    display_txt = '{:0.2f}, {}'.format(score, label)
    coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1
    color = colors[label]
    currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
    currentAxis.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.5})

plt.show()