In [15]:
import keras
from keras.applications.imagenet_utils import preprocess_input
from keras.preprocessing import image
import matplotlib.pyplot as plt
import numpy as np
import pickle
from random import shuffle
from scipy.misc import imread
from scipy.misc import imresize
from keras.callbacks import TensorBoard
from keras.callbacks import EarlyStopping
from time import gmtime, strftime
import os

import tensorflow as tf

from ssd_v2 import SSD300v2
from ssd_512 import SSD512
from ssd_training import MultiboxLoss
from ssd_utils import BBoxUtility

%matplotlib inline
plt.rcParams['figure.figsize'] = (8, 8)
plt.rcParams['image.interpolation'] = 'nearest'

np.set_printoptions(suppress=True)

# config = tf.ConfigProto()
# config.gpu_options.per_process_gpu_memory_fraction = 0.9
# set_session(tf.Session(config=config))

In [2]:
# some constants
# number of class
# Note that it contains background
NUM_CLASSES = 2
# (300, 300, 3) or (512, 512, 3)
# this constant decides used model
# if you use SSD300, then you should set (300, 300, 3)
input_shape = (300, 300, 3)
ssd_300_shape = (300, 300, 3)
ssd_512_shape = (512, 512, 3)

In [3]:
priors = pickle.load(open('prior_boxes_ssd300.pkl'if input_shape == ssd_300_shape else 'prior_boxes_ssd512.pkl', 'rb'))
bbox_util = BBoxUtility(NUM_CLASSES, priors)

In [4]:
gt = pickle.load(open('kitkat300.pkl'if input_shape == ssd_300_shape else 'kitkat500.pkl', 'rb'))
keys = sorted(gt.keys())
num_train = int(round(0.8 * len(keys)))
train_keys = keys[:num_train]
val_keys = keys[num_train:]
num_val = len(val_keys)

In [5]:
class Generator(object):
    def __init__(self, gt, bbox_util,
                 batch_size, path_prefix,
                 train_keys, val_keys, image_size,
                 saturation_var=0.5,
                 brightness_var=0.5,
                 contrast_var=0.5,
                 lighting_std=0.5,
                 hflip_prob=0.5,
                 vflip_prob=0.5,
                 do_crop=True,
                 crop_area_range=[0.75, 1.0],
                 aspect_ratio_range=[3./4., 4./3.]):
        self.gt = gt
        self.bbox_util = bbox_util
        self.batch_size = batch_size
        self.path_prefix = path_prefix
        self.train_keys = train_keys
        self.val_keys = val_keys
        self.train_batches = len(train_keys)
        self.val_batches = len(val_keys)
        self.image_size = image_size
        self.color_jitter = []
        if saturation_var:
            self.saturation_var = saturation_var
            self.color_jitter.append(self.saturation)
        if brightness_var:
            self.brightness_var = brightness_var
            self.color_jitter.append(self.brightness)
        if contrast_var:
            self.contrast_var = contrast_var
            self.color_jitter.append(self.contrast)
        self.lighting_std = lighting_std
        self.hflip_prob = hflip_prob
        self.vflip_prob = vflip_prob
        self.do_crop = do_crop
        self.crop_area_range = crop_area_range
        self.aspect_ratio_range = aspect_ratio_range
        
    def grayscale(self, rgb):
        return rgb.dot([0.299, 0.587, 0.114])

    def saturation(self, rgb):
        gs = self.grayscale(rgb)
        alpha = 2 * np.random.random() * self.saturation_var 
        alpha += 1 - self.saturation_var
        rgb = rgb * alpha + (1 - alpha) * gs[:, :, None]
        return np.clip(rgb, 0, 255)

    def brightness(self, rgb):
        alpha = 2 * np.random.random() * self.brightness_var 
        alpha += 1 - self.saturation_var
        rgb = rgb * alpha
        return np.clip(rgb, 0, 255)

    def contrast(self, rgb):
        gs = self.grayscale(rgb).mean() * np.ones_like(rgb)
        alpha = 2 * np.random.random() * self.contrast_var 
        alpha += 1 - self.contrast_var
        rgb = rgb * alpha + (1 - alpha) * gs
        return np.clip(rgb, 0, 255)

    def lighting(self, img):
        cov = np.cov(img.reshape(-1, 3) / 255.0, rowvar=False)
        eigval, eigvec = np.linalg.eigh(cov)
        noise = np.random.randn(3) * self.lighting_std
        noise = eigvec.dot(eigval * noise) * 255
        img += noise
        return np.clip(img, 0, 255)
    
    def horizontal_flip(self, img, y):
        if np.random.random() < self.hflip_prob:
            img = img[:, ::-1]
            y[:, [0, 2]] = 1 - y[:, [2, 0]]
        return img, y
    
    def vertical_flip(self, img, y):
        if np.random.random() < self.vflip_prob:
            img = img[::-1]
            y[:, [1, 3]] = 1 - y[:, [3, 1]]
        return img, y
    
    def random_sized_crop(self, img, targets):
        img_w = img.shape[1]
        img_h = img.shape[0]
        img_area = img_w * img_h
        random_scale = np.random.random()
        random_scale *= (self.crop_area_range[1] -
                         self.crop_area_range[0])
        random_scale += self.crop_area_range[0]
        target_area = random_scale * img_area
        random_ratio = np.random.random()
        random_ratio *= (self.aspect_ratio_range[1] -
                         self.aspect_ratio_range[0])
        random_ratio += self.aspect_ratio_range[0]
        w = np.round(np.sqrt(target_area * random_ratio))     
        h = np.round(np.sqrt(target_area / random_ratio))
        if np.random.random() < 0.5:
            w, h = h, w
        w = min(w, img_w)
        w_rel = w / img_w
        w = int(w)
        h = min(h, img_h)
        h_rel = h / img_h
        h = int(h)
        x = np.random.random() * (img_w - w)
        x_rel = x / img_w
        x = int(x)
        y = np.random.random() * (img_h - h)
        y_rel = y / img_h
        y = int(y)
        img = img[y:y+h, x:x+w]
        new_targets = []
        for box in targets:
            cx = 0.5 * (box[0] + box[2])
            cy = 0.5 * (box[1] + box[3])
            if (x_rel < cx < x_rel + w_rel and
                y_rel < cy < y_rel + h_rel):
                xmin = (box[0] - x_rel) / w_rel
                ymin = (box[1] - y_rel) / h_rel
                xmax = (box[2] - x_rel) / w_rel
                ymax = (box[3] - y_rel) / h_rel
                xmin = max(0, xmin)
                ymin = max(0, ymin)
                xmax = min(1, xmax)
                ymax = min(1, ymax)
                box[:4] = [xmin, ymin, xmax, ymax]
                new_targets.append(box)
        new_targets = np.asarray(new_targets).reshape(-1, targets.shape[1])
        return img, new_targets
    
    def generate(self, train=True):
        while True:
            if train:
                shuffle(self.train_keys)
                keys = self.train_keys
            else:
                shuffle(self.val_keys)
                keys = self.val_keys
            inputs = []
            targets = []
            for key in keys:            
                img_path = self.path_prefix + key
                img = imread(img_path).astype('float32')
                y = self.gt[key].copy()
                if train and self.do_crop:
                    img, y = self.random_sized_crop(img, y)
                img = imresize(img, self.image_size).astype('float32')
                if train:
                    shuffle(self.color_jitter)
                    for jitter in self.color_jitter:
                        img = jitter(img)
                    if self.lighting_std:
                        img = self.lighting(img)
                    if self.hflip_prob > 0:
                        img, y = self.horizontal_flip(img, y)
                    if self.vflip_prob > 0:
                        img, y = self.vertical_flip(img, y)
                y = self.bbox_util.assign_boxes(y)
                inputs.append(img)                
                targets.append(y)
                if len(targets) == self.batch_size:
                    tmp_inp = np.array(inputs)
                    tmp_targets = np.array(targets)
                    inputs = []
                    targets = []
                    yield preprocess_input(tmp_inp), tmp_targets

In [6]:
path_prefix = './training_set300/JPEGImages/' if input_shape == ssd_300_shape else './training_set500/JPEGImages/'
gen = Generator(gt, bbox_util, 4, path_prefix,
                train_keys, val_keys,
                (input_shape[0], input_shape[1]), do_crop=False)

In [7]:
import h5py
model = SSD300v2(input_shape, num_classes=NUM_CLASSES) if input_shape == ssd_300_shape else SSD512(input_shape, num_classes=NUM_CLASSES)
model.load_weights('weights_SSD300.hdf5' if input_shape == ssd_300_shape else 'VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5', by_name=True)

  name='conv1_1')(net['input'])
  name='conv1_2')(net['conv1_1'])
  name='pool1')(net['conv1_2'])
  name='conv2_1')(net['pool1'])
  name='conv2_2')(net['conv2_1'])
  name='pool2')(net['conv2_2'])
  name='conv3_1')(net['pool2'])
  name='conv3_2')(net['conv3_1'])
  name='conv3_3')(net['conv3_2'])
  name='pool3')(net['conv3_3'])
  name='conv4_1')(net['pool3'])
  name='conv4_2')(net['conv4_1'])
  name='conv4_3')(net['conv4_2'])
  name='pool4')(net['conv4_3'])
  name='conv5_1')(net['pool4'])
  name='conv5_2')(net['conv5_1'])
  name='conv5_3')(net['conv5_2'])
  name='pool5')(net['conv5_3'])
  return Conv2D(*args, **kwargs)
  border_mode='same', name='fc7')(net['fc6'])
  name='conv6_1')(net['fc7'])
  name='conv6_2')(net['conv6_1'])
  name='conv7_1')(net['conv6_2'])
  name='conv7_2')(net['conv7_2'])
  name='conv8_1')(net['conv7_2'])
  name='conv8_2')(net['conv8_1'])
  name='conv4_3_norm_mbox_loc')(net['conv4_3_norm'])
  name=name)(net['conv4_3_norm'])
  name='fc7_mbox_loc')(net['fc7'])
  name=

In [8]:
freeze = ['input_1', 'conv1_1', 'conv1_2', 'pool1',
          'conv2_1', 'conv2_2', 'pool2',
          'conv3_1', 'conv3_2', 'conv3_3', 'pool3']#,
#           'conv4_1', 'conv4_2', 'conv4_3', 'pool4']

for L in model.layers:
    if L.name in freeze:
        L.trainable = False

In [9]:
def schedule(epoch, decay=0.9):
    return base_lr * decay**(epoch)

early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1)

callbacks = [keras.callbacks.ModelCheckpoint('./checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5',
                                             verbose=1,
                                             save_weights_only=True),
             keras.callbacks.LearningRateScheduler(schedule), early_stopping]

In [10]:
base_lr = 3e-4
optim = keras.optimizers.Adam(lr=base_lr)
# optim = keras.optimizers.RMSprop(lr=base_lr)
# optim = keras.optimizers.SGD(lr=base_lr, momentum=0.9, decay=decay, nesterov=True)

# def calculate_mAP(y_true,y_pred):
#     num_classes = y_true.shape[1]
#     average_precisions = []
#     relevant = K.sum(K.round(K.clip(y_true, 0, 1)))
#     tp_whole = K.round(K.clip(y_true * y_pred, 0, 1))
#     for index in range(num_classes):
#         temp = K.sum(tp_whole[:,:index+1],axis=1)
#         average_precisions.append(temp * (1/(index + 1)))
#     AP = Add()(average_precisions) / relevant
#     mAP = K.mean(AP,axis=0)
#     return mAP


model.compile(optimizer=optim,
              loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=2.0).compute_loss,
              metrics=['sparse_categorical_accuracy'])

In [11]:
tictoc = strftime("%a_%d_%b_%Y_%H_%M_%S", gmtime())                             
directory_name = tictoc
log_dir = directory_name
os.mkdir(log_dir)
tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True,)
callbacks.append(tensorboard)

In [12]:
nb_epoch = 30
history = model.fit_generator(gen.generate(True), gen.train_batches,
                              nb_epoch, verbose=1,
                              callbacks=callbacks,
                              validation_data=gen.generate(False),
                              nb_val_samples=gen.val_batches,
                              nb_worker=1)

  import sys
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Summary name conv1_1/kernel:0 is illegal; using conv1_1/kernel_0 instead.
INFO:tensorflow:Summary name conv1_1/bias:0 is illegal; using conv1_1/bias_0 instead.
INFO:tensorflow:Summary name conv1_2/kernel:0 is illegal; using conv1_2/kernel_0 instead.
INFO:tensorflow:Summary name conv1_2/bias:0 is illegal; using conv1_2/bias_0 instead.
INFO:tensorflow:Summary name conv2_1/kernel:0 is illegal; using conv2_1/kernel_0 instead.
INFO:tensorflow:Summary name conv2_1/bias:0 is illegal; using conv2_1/bias_0 instead.
INFO:tensorflow:Summary name conv2_2/kernel:0 is illegal; using conv2_2/kernel_0 instead.
INFO:tensorflow:Summary name conv2_2/bias:0 is illegal; using conv2_2/bias_0 instead.
INFO:tensorflow:Summary name conv3_1/kernel:0 is illegal; using conv3_1/kernel_0 instead.
INFO:tensorflow:Summary name conv3_1/bias:0 is illegal; using conv3_1/bias_0 instead.
INFO:tensorflow:Summary name conv3_2/kernel:0 is illegal; using conv3_2/kernel_0 instead.
INFO:tensorflow:Summary name c

  assigned_priors_wh)




OSError: Unable to create file (Unable to open file: name = './checkpoints/weights.00-1.20.hdf5', errno = 2, error message = 'no such file or directory', flags = 13, o_flags = 242)