In [1]:
# from keras import backend as K
import keras 
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input, decode_predictions
from keras.models import Model
from keras.layers import Dense, Flatten, GlobalAveragePooling2D
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint, TensorBoard, Callback
import numpy as np
import os
import pickle
import scipy.misc
import matplotlib.pyplot as plt
from tqdm import tqdm
import sys
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # suppress tensorflow "wasn't compiled to use" warnings
# print(keras.__version__)


from augment_dets import *
from params import *
from main import *
from read_data_kitti_ssd import *

# GPU number to use
os.environ["CUDA_VISIBLE_DEVICES"] = '2'
# os.environ["CUDA_VISIBLE_DEVICES"] = '3'

out_folder = '../out/'
data_folder = '../data/'
tb_logs_dir = out_folder + 'logs/'
num_out_classes = 80+1
img_size = 224


Using TensorFlow backend.


In [2]:
# helper functions

def gray2rgb(image):
  rgb = np.empty([image.shape[0], image.shape[1], 3]).astype(np.uint8)
  rgb[:,:,0] = image
  rgb[:,:,1] = image
  rgb[:,:,2] = image
  return rgb

def eval_pr(lbl, conf, correct, labels, num_objects, plot_fig=True):
    # calculate precision-recall for detections (when we already have the correctness of detections)
    # lbl - the label/type of objects to evaluate (int)
    # conf - list of detections' confidence
    # correct - correctness of each detection
    # labels - label assigned to each detection (int)
    # num_objects - total number of objects in the set
    
    # keep only relevant detections
    relevant = labels == lbl
    conf = conf[relevant]
    correct = correct[relevant]
    labels = labels[relevant]
    
    # sort detections by decreasing score across the whole set
    sorted_id = np.argsort(-conf)
    conf = conf[sorted_id]
    correct = correct[sorted_id]

    tp = correct
    fp = np.logical_not(tp)

    # compute precision / recall
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    rec = tp / num_objects
    prec = np.divide(tp, (tp + fp))

    # compute average precision
    ap = 0
    p_all = []
    rec_points = np.arange(0, 1.01, 0.01)
    for t in rec_points:
        tmp = prec[rec >= t]
        if len(tmp) == 0:
            p = 0
        else:
            p = max(tmp)
        ap += p / len(rec_points)
        p_all.append(p)

    ap_str = "{0:.2f}".format(ap)
#     print(ap)
    # print(ap_str)

    if plot_fig:
        plt.plot(rec_points, p_all)
        plt.title('AP=' + ap_str)
        plt.xlabel('recall')
        plt.ylabel('precision')
        plt.ylim((0, 1))
        plt.xlim((0, 1))
        plt.grid(True)
        plt.show()

    return ap

def prepare_batch(det_imgs, dets, det_ids, img_size, params):
    # prepare detections batch to be fed to the network

    # load and prepare data
    labels_int = np.empty(len(det_ids))
    data = np.empty([len(det_ids), img_size, img_size, 3])
    prev_img_path = ''
    for i, di in enumerate(det_ids):
        d = dets[di]
        det_img = det_imgs[di]

        data[i] = det_img
        if d['correct']:
            labels_int[i] = params['object_labels'].index(d['label'])
        else:
            labels_int[i] = 80

    data = preprocess_input(data)
    labels = np_utils.to_categorical(labels_int, num_out_classes)

    return data, labels

def generate_batches(det_imgs, dets, det_ids, batch_size, img_size, params):
    # batch generator for the training process
    
    # determine batches
    num_samples = len(det_ids)
    num_batches = int(np.ceil(num_samples / float(batch_size)))
    batches = [(i * batch_size, min(num_samples, (i + 1) * batch_size)) 
               for i in range(0, num_batches)]
    
    while 1:  # do epoch
        for b in batches:  # do single iteration/batch
            x, y = prepare_batch(det_imgs, dets, det_ids[b[0]:b[1]], img_size, params)
            yield (x, y)
            
def prep_det_img(img, d, img_size):
    
    # factor of width and height to add to the margin of the detection
    det_size_factor = 0.1

    img_height = img.shape[0]
    img_width = img.shape[1]
    img_limits = np.array([img_width-1, img_height-1, img_width-1, img_height-1])
    # add enlarged gray rectangle over detection
    det_width = d['x2'] - d['x1']
    det_height = d['y2'] - d['y1']
    det_xy = np.array([d['x1'], d['y1'], d['x2'], d['y2']])
    det_xy += np.array([-det_width, -det_height, det_width, det_height]) * det_size_factor
    det_xy[det_xy < 0] = 0
    det_xy[det_xy >= img_limits] = img_limits[det_xy >= img_limits]
    det_xy = det_xy.astype(np.int32)
    
    det_width = det_xy[2] - det_xy[0]
    det_height = det_xy[3] - det_xy[1]
    
    det_img = np.copy(img)
    det_img[det_xy[1]:det_xy[3], det_xy[0]:det_xy[2]] = np.ones([det_height,det_width,3])*127
    det_img = scipy.misc.imresize(det_img, [img_size, img_size])
    det_img = det_img.astype(np.uint8)
    return det_img
            
def load_data(train_data, img_size, get_det_imgs, shuffle, params, num_imgs=-1):
    # load detection and create detection images
    
    # load detections
    if train_data:
        data = read_data_coco_faster_rcnn(params, 'train', num_imgs)
    else:
        data = read_data_coco_faster_rcnn(params, 'both', num_imgs)
    # add correctness and FP type to detections
    num_positives = augment_dets(data, params)
    # keep only relevant detections
    dets = [d for d in data['dets'] if d['conf'] >= min_conf and not d['dont_care']]

    # shuffle detections
    if shuffle:
        np.random.shuffle(dets)

    # load images
    print('loading images')
    imgs = {}
    for img_id in tqdm(data['imgs'].keys()):
        if train_data:
            img_path = params['imgs_folder'] + 'train/'+ 'COCO_train2014_000000' + str(img_id).rjust(6, '0') + '.jpg'
        else:
            img_path = params['imgs_folder'] + 'val/' + 'COCO_val2014_000000' + str(img_id).rjust(6, '0') + '.jpg'
        img = plt.imread(img_path)
        if len(img.shape) < 3:  # if grayscale convert to color
            img = gray2rgb(img)
        imgs[img_id] = img
    print('done loading images')

    # prepare detection images
    det_imgs = []
    if get_det_imgs:
        print('creating detection images')
        det_imgs = np.zeros([len(dets), img_size, img_size, 3])
        for i, d in enumerate(tqdm(dets)):
            img = imgs[d['img_id']]
            det_img = prep_det_img(img, d, img_size)
            det_imgs[i] = det_img
        det_imgs = det_imgs.astype(np.uint8)
        print('done creating detection images')

        # free original images
        imgs = []
    
    return dets, imgs, det_imgs, num_positives

Prepare data and labels

In [3]:

# minimum confidence detections to employ
min_conf = 0.5
# min_conf = -1
# min_conf = 0.1
params = get_params()

# get detections and their images
train_data = True
shuffle = True
get_det_imgs = True
num_imgs = 6000
dets, _, det_imgs, num_positives = load_data(train_data, img_size, get_det_imgs, shuffle, params, num_imgs=num_imgs)
    
# split to train and val
num_train_samples = round(len(dets) * 0.8)
train_det_ids = np.arange(num_train_samples)
val_det_ids = np.arange(num_train_samples,len(dets))

# count number of TPs and FPs
num_tps = len([di for di in train_det_ids if dets[di]['correct']])
num_fps = len(train_det_ids) - num_tps

# calculate batch sizes
batch_size = 32
num_train_batches = int(np.ceil(len(train_det_ids) / float(batch_size)))
num_val_batches = int(np.ceil(len(val_det_ids) / float(batch_size)))

print('num train samples = ' + str(len(train_det_ids)))
print('num val samples = ' + str(len(val_det_ids)))

print('num train TPs = ' + str(num_tps))
print('num train FPs = ' + str(num_fps))
print('ratio of FPs to TPs = ' + str(num_fps/num_tps))

Loading objects and detections
augmenting detections


100%|██████████| 6000/6000 [00:26<00:00, 227.05it/s]


done augmenting detections


  0%|          | 24/6000 [00:00<00:24, 239.33it/s]

loading images


100%|██████████| 6000/6000 [00:25<00:00, 238.39it/s]
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  0%|          | 10/33984 [00:00<05:51, 96.71it/s]

done loading images
creating detection images


100%|██████████| 33984/33984 [02:52<00:00, 197.54it/s]


done creating detection images
num train samples = 27187
num val samples = 6797
num train TPs = 21379
num train FPs = 5808
ratio of FPs to TPs = 0.27166845970344733


Create model

In [4]:
# create the base pre-trained model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))

# add a fully connected output layer
x = base_model.output

# add top layers
x = Flatten(name='flatten')(x)
predictions = Dense(num_out_classes, activation='softmax', name='dense_last')(x)

# create model for training
model = Model(inputs=base_model.input, outputs=predictions)

# freeze convolutional layers
for layer in base_model.layers[:-8]:
    layer.trainable = False
#     layer.trainable = True

# optimiziers
opt = keras.optimizers.SGD(lr=0.0001, momentum=0.0, decay=0.0, nesterov=False)

# compile the model
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

Train model

In [5]:
# create generators
train_gen = generate_batches(det_imgs, dets, train_det_ids, batch_size, img_size, params)
val_gen = generate_batches(det_imgs, dets, val_det_ids, batch_size, img_size, params)

# prepare callbacks
checkpoint_cb = ModelCheckpoint(out_folder+'weights_{epoch:02d}.hdf5', period=1, save_weights_only=True)
num_files = len(os.listdir(tb_logs_dir))
run_name = 'run_' + str(num_files)
tensorboard_cb = TensorBoard(log_dir=tb_logs_dir + run_name)
print('Tensorboard run: ' + run_name)

# start training
hist = model.fit_generator(train_gen, steps_per_epoch=num_train_batches, epochs=1000, verbose=1, validation_data=val_gen, validation_steps=num_val_batches, callbacks=[checkpoint_cb, tensorboard_cb])

Tensorboard run: run_19
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000

KeyboardInterrupt: 

Run model on test set

In [7]:

# minimum confidence detections to employ
min_conf = 0.5
# min_conf = -1
# min_conf = 0.1
params = get_params()

# load weights
print('Loading weights')
model.load_weights(out_folder + '/weights_31.hdf5')

# load images and detections
train_data = False
shuffle = False
get_det_imgs = False
num_imgs = -1
dets, imgs, _, num_positives = load_data(train_data, img_size, get_det_imgs, shuffle, params, num_imgs=num_imgs)

# prepare detection images and predict
print('predict detections')
pred_confs = []
for i, d in enumerate(tqdm(dets)):
    del_lbl = params['object_labels'].index(d['label'])
    # prepare detection image
    img = imgs[d['img_id']]
    det_img = prep_det_img(img, d, img_size)
    # prepare image for network
    det_imgs = np.zeros([1, img_size, img_size, 3])
    det_imgs[0] = det_img
    det_imgs = preprocess_input(det_imgs)
    # predict
    preds = model.predict(det_imgs, batch_size=1, verbose=0)
    pred_confs.append(preds[0][del_lbl])
pred_confs = np.array(pred_confs);
print('done predicting detections')


Loading weights
Loading objects and detections
augmenting detections


100%|██████████| 40504/40504 [02:38<00:00, 255.32it/s]


done augmenting detections


  0%|          | 21/40504 [00:00<03:19, 202.97it/s]

loading images


100%|██████████| 40504/40504 [02:49<00:00, 238.69it/s]
  0%|          | 0/224144 [00:00<?, ?it/s]

done loading images
predict detections


`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
100%|██████████| 224144/224144 [1:02:56<00:00, 59.35it/s]

done predicting detections





Evaluate model

In [8]:
# prepare confidence and label of original detector
test_labels = np.array([params['object_labels'].index(d['label']) for d in dets])
test_corrects = np.array([d['correct'] for d in dets])
test_confs = np.array([d['conf'] for d in dets])

for lbl_int, lbl in enumerate(params['object_labels']):
    print(lbl)
    
    # evaluate original detector
#     print('Base detector:')
    ap_base = eval_pr(lbl_int, test_confs, test_corrects, test_labels, num_positives[lbl], plot_fig=False)

    # evaluate model
#     print('Model:')
    ap = eval_pr(lbl_int, pred_confs, test_corrects, test_labels, num_positives[lbl], plot_fig=False)
    
    print(100*(ap_base-ap))


person
4.17615486122
bicycle
10.0502052986
car
8.73227817738
motorcycle
6.66083477758
airplane
6.86793839023
bus
8.15933597356
train
5.57188270481
truck
9.94085713214
boat
10.1766197922
traffic light
8.19935344617
fire hydrant
5.31655701345
stop sign
7.6039012522
parking meter
7.28622899118
bench
9.95922350727
bird
4.94965286612
cat
8.35081908721
dog
8.44748199345
horse
8.69436928892
sheep
4.89798012348
cow
7.22303870424
elephant
8.35145303127
bear
10.4373560734
zebra
9.38325878632
giraffe
8.07086653272
backpack
6.41371649859
umbrella
8.02788688845
handbag
4.95095464738
tie
6.29489030924
suitcase
6.90540264811
frisbee
6.33509528987
skis
8.14176244785
snowboard
8.48019986342
sports ball
4.18207183324
kite
5.23615778196
baseball bat
6.19937512404
baseball glove
4.10327141821
skateboard
5.62226623787
surfboard
10.5197889762
tennis racket
4.52833161822
bottle
9.15350262394
wine glass
4.92852127295
cup
7.79235091334
fork
6.17876156031
knife
7.13976971211
spoon
4.90775882318
bowl
11.57621878

Save predictions

In [9]:

with open(out_folder + "coco_nn_predictions_bigger_rect.txt", "w") as out_file:
    for i in range(len(dets)):
        d = dets[i]
        id_in_img = d['id_in_img']
        img_id = d['img_id']
        pred = pred_confs[i]
        out_str = '%d,%d,%f\n'%(id_in_img, img_id, pred)
        out_file.write(out_str)

