In [1]:
# ---------------------------------------------------------------------------------------------
# In this test part
# 1. We test performance of 3 model we train in the TRAINING PART 
# 2. Three Model:
#    Faster RCNN with shared CNN: VGG16, Resnet50, and Inception V4
# 3. Each time we test, we load Config from pickle with stores Config information,
#    and weights of model from h5py file.
#    TWO CRITICAL PARTS: A and B. you can find them below
# ---------------------------------------------------------------------------------------------


In [1]:

import h5py
import os
import cv2
import numpy as np
import sys
import pickle
from optparse import OptionParser
import time
from keras_frcnn import config
from keras import backend as K
from keras.layers import Input
from keras.models import Model
from keras_frcnn import roi_helpers

Using TensorFlow backend.


In [2]:
"""
parse arguments
Used in the test process
Idea from py-faster-rcnn (python + caffe, https://github.com/rbgirshick/py-faster-rcnn)
"""
sys.setrecursionlimit(40000)

parser = OptionParser()
parser.add_option('-f','--fff',dest = 'fff',default='/Users/zehaodong/Desktop/faster-rcnn-keras')
parser.add_option("-p", "--path", dest="test_path", help="Path to test data.",default = '/Users/zehaodong/Desktop/faster-rcnn-keras/VOCdevkit')
parser.add_option("-n", "--num_rois", type="int", dest="num_rois",help="Number of ROIs per iteration. Higher means more memory use.", default=32)
choose = input('new model path? T or F:')
# ------------------------------------------------------------------------
# A: here decide which model to test and visualization
#    choose a pickle name: config_vgg_16.pickle, config_best.pickle
#                          config_resnet.pickle, config_inception.pickle
#    Note: We trained 4 model, since this work is time consuming,
#          only vgg16 is able to trined for about 1000 epoch.
#          others are only trained for about 20 epoch each.
# ------------------------------------------------------------------------
if choose == 'T':
    con = input('filename:')
else:
    con = "config.pickle"
parser.add_option("--config_filename", dest="config_filename", help="Location to read the metadata related to the training (generated when training).",default= con)
parser.add_option("--network", dest="network", help="raw feature extraction net", default= 'inception_v4')
(options, args) = parser.parse_args()

new model path? T or F:T
filename:config_vgg_16.pickle


In [4]:
# ------------------------------------------------------------------------
# In each case, the benchmark of test process are same
# ------------------------------------------------------------------------


## Case 1: Faster Rcnn based on vgg16

### Import trained model

In [3]:
file_name = options.config_filename

In [4]:
cd Built_model

/Users/zehaodong/Desktop/faster-rcnn-keras/Built_model


In [5]:
with open(file_name, 'rb') as f_name:
    C = pickle.load(f_name)

In [6]:
C.network

'vgg'

In [7]:
##### This initilazer are according to data_generator.py #####
'''
Reference: https://github.com/yhenon/keras-frcnn
'''
# turn off any data augmentation at test time
C.use_horizontal_flips = False
C.use_vertical_flips = False
C.rot_90 = False

#### import raw feature extraction network from trained model 

In [8]:
if C.network == 'vgg':
    from keras_frcnn import vgg as nn
elif C.network == 'resnet50':
    from keras_frcnn import resnet as nn
else:
    from keras_frcnn import inception_V4 as nn   

#### other features int he trained model

In [9]:
# map of class
class_mapping = C.class_mapping
if 'bg' not in class_mapping:
    class_mapping['bg'] = len(class_mapping)
class_mapping = {v: k for k, v in class_mapping.items()}
print(class_mapping)

{4: 'cow', 15: 'diningtable', 20: 'bg', 18: 'aeroplane', 2: 'chair', 6: 'car', 7: 'motorbike', 16: 'train', 17: 'sofa', 13: 'boat', 10: 'cat', 3: 'tvmonitor', 14: 'bottle', 8: 'dog', 0: 'bus', 19: 'sheep', 5: 'bird', 9: 'horse', 1: 'person', 12: 'pottedplant', 11: 'bicycle'}


In [10]:
class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
C.num_rois = int(options.num_rois)

In [11]:
if C.network == 'resnet50':
    num_features = 1024
elif C.network == 'vgg':
    num_features = 512
elif C.network == 'inception_v4':
    num_features = 512
else:
    print('model error')

In [12]:
# numer of anchors
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)

#### utility functions

In [13]:
def trans_to_real_coordinates(ratio, x1, y1, x2, y2):
    # transform coordinate in the feature map back to the origin
    real_x1 = int(round(x1 // ratio))
    real_y1 = int(round(y1 // ratio))
    real_x2 = int(round(x2 // ratio))
    real_y2 = int(round(y2 // ratio))
    return (real_x1, real_y1, real_x2 ,real_y2)

In [14]:
def format_img(img, C):
    #formats an image for model prediction based on config
    img, ratio = format_img_size(img, C)
    img = format_img_channels(img, C)
    return img, ratio

In [15]:
def format_img_size(img, C):
    # formats the image size based on config
    """
    Referencr: https://github.com/riadhayachi/faster-rcnn-keras
    """
    img_min_side = float(C.im_size)
    (height,width,_) = img.shape

    if width <= height:
        ratio = img_min_side/width
        new_height = int(ratio * height)
        new_width = int(img_min_side)
    else:
        ratio = img_min_side/height
        new_width = int(ratio * width)
        new_height = int(img_min_side)
    img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
    return img, ratio

In [16]:
def format_img_channels(img, C):
    # formats the image channels based on config 
    """
    Referencr: https://github.com/riadhayachi/faster-rcnn-keras
    """
    img = img[:, :, (2, 1, 0)]
    img = img.astype(np.float32)
    img[:, :, 0] -= C.img_channel_mean[0]
    img[:, :, 1] -= C.img_channel_mean[1]
    img[:, :, 2] -= C.img_channel_mean[2]
    img /= C.img_scaling_factor
    img = np.transpose(img, (2, 0, 1))
    img = np.expand_dims(img, axis=0)
    return img

### Reconstruct faster RCNN model

In [17]:
img_path = options.test_path

#### model input

In [19]:
##### this shape difference based on theano or tensorflow #####
if K.image_dim_ordering() == 'th':
    input_shape_img = (3, None, None)
    input_shape_features = (num_features, None, None)
else:
    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, num_features)
##### same input as we used to build the model in Frcnn_training
img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(C.num_rois, 4))
feature_map_input = Input(shape=input_shape_features)

#### frcnn network structure

In [20]:
######### layers in FRCNN #########
# raw feature extraction net, or shared CNN layer
shared_CNN = nn.nn_base(img_input, trainable=True)
# RPN layer
rpn_layers = nn.rpn(shared_CNN, num_anchors)
# classifier layer, from fast RCNN
classifier_layer = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)

In [21]:
######### model built by layer #########
# RPN model
model_rpn = Model(img_input, rpn_layers)
# classifier model 
model_classifier = Model([feature_map_input, roi_input], classifier_layer)

In [22]:
# ------------------------------------------------------------------------
# B: here uploads weights for each model
#    choose a h5py file name: 
#          use it to get the weight of each model from traing process
#         
# ------------------------------------------------------------------------

C.model_path = '/Users/zehaodong/Desktop/faster-rcnn-keras/Built_model/model_vgg16_weights.h5'
#C.model_path = '/Users/zehaodong/Desktop/faster-rcnn-keras/Built_model/model_inception_weights.h5'
#C.model_path = '/Users/zehaodong/Desktop/faster-rcnn-keras/Built_model/model_resnet_weights.h5'
#C.model_path = '/Users/zehaodong/Desktop/faster-rcnn-keras/Built_model/model_best.hdf5'

In [23]:
######### import weights for model #########
model_rpn.load_weights(C.model_path, by_name=True)
model_classifier.load_weights(C.model_path, by_name=True)

In [24]:
######### add optimizer #########
model_rpn.compile(optimizer='sgd', loss='mse')
model_classifier.compile(optimizer='sgd', loss='mse')

### Test Data

In [25]:
######### initializer #########
all_imgs = []
classes = {}
bbox_threshold = 0.8
visualise = True

In [26]:
##### path set #####
img_path = '/Users/zehaodong/Desktop/faster-rcnn-keras/VOCdevkit_2'

In [27]:
img_path=os.path.join(img_path,'VOC2007')
img_path=os.path.join(img_path,'JPEGImages')

In [28]:
dddd = os.listdir(img_path)[:16]

In [29]:
img_name = dddd[0]
img_name

'005770.jpg'

In [30]:
filepath = os.path.join(img_path,img_name)

In [34]:
filepath

'/Users/zehaodong/Desktop/faster-rcnn-keras/VOCdevkit_2/VOC2007/JPEGImages/005770.jpg'

In [64]:
img = cv2.imread(filepath)

In [65]:
cv2.rectangle(img, (98,100), (485,250), (0, 0, 255), 2)
cv2.imwrite('/Users/zehaodong/Desktop/005770.png',img)

True

In [50]:
##### test process through visualization #####
"""
Reference: py-faster-rcnn (python + caffe, https://github.com/rbgirshick/py-faster-rcnn)
choose first 100 to see their performane
for conveniance to generate html file, just use 20
"""

for idx, img_name in enumerate(sorted(os.listdir(img_path)[:2000])):
    print('number of iteration', idx)
    if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
        continue
    print(img_name)
    st = time.time()
    filepath = os.path.join(img_path,img_name)
    img = cv2.imread(filepath)
    X, ratio = format_img(img, C)
    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))
    # get the feature maps and output from the RPN
    [Y1, Y2, F] = model_rpn.predict(X)
    R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)
    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]
    # apply the spatial pyramid pooling to the proposed regions
    bboxes = {}
    probs = {}
    for jk in range(R.shape[0]//C.num_rois + 1):
        ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
        if ROIs.shape[1] == 0:
            break
        if jk == R.shape[0]//C.num_rois:
            #pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded
        [P_cls, P_regr] = model_classifier.predict([F, ROIs])
        for ii in range(P_cls.shape[1]):
            if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue
            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]
            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []
            (x, y, w, h) = ROIs[0, ii, :]
            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
                tx /= C.classifier_regr_std[0]
                ty /= C.classifier_regr_std[1]
                tw /= C.classifier_regr_std[2]
                th /= C.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))
    all_dets = []
    for key in bboxes:
        bbox = np.array(bboxes[key])
        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk,:]
            (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)
            cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)
            textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
            all_dets.append((key,100*new_probs[jk]))
            (retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
            textOrg = (real_x1, real_y1-0)
            cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
            cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
            cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
    cv2.imwrite('/Users/zehaodong/Desktop/faster-rcnn-keras/results_vgg_imgs/{}.png'.format(idx),img)

number of iteration 0
000008.jpg
number of iteration 1
000018.jpg
number of iteration 2
000022.jpg
number of iteration 3
000025.jpg
number of iteration 4
000027.jpg
number of iteration 5
000031.jpg
number of iteration 6
000037.jpg
number of iteration 7
000043.jpg
number of iteration 8
000045.jpg
number of iteration 9
000053.jpg
number of iteration 10
000057.jpg
number of iteration 11
000079.jpg
number of iteration 12
000080.jpg
number of iteration 13
000084.jpg
number of iteration 14
000085.jpg
number of iteration 15
000086.jpg
number of iteration 16
000087.jpg
number of iteration 17
000090.jpg
number of iteration 18
000092.jpg
number of iteration 19
000094.jpg
number of iteration 20
000119.jpg
number of iteration 21
000124.jpg
number of iteration 22
000126.jpg
number of iteration 23
000127.jpg
number of iteration 24
000137.jpg
number of iteration 25
000144.jpg
number of iteration 26
000145.jpg
number of iteration 27
000151.jpg
number of iteration 28
000152.jpg
number of iteration 29
0

KeyboardInterrupt: 

In [65]:
for jk in range(R.shape[0]//C.num_rois + 1):
    ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
    if ROIs.shape[1] == 0:
        break
    if jk == R.shape[0]//C.num_rois:
        #pad R
        curr_shape = ROIs.shape
        target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
        ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
        ROIs_padded[:, :curr_shape[1], :] = ROIs
        ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
        ROIs = ROIs_padded
    [P_cls, P_regr] = model_classifier.predict([F, ROIs])
    print(P_cls.shape[1])
    for ii in range(P_cls.shape[1]):
        if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
            print('wow')
            continue
        cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]
        print(cls_name)
        if cls_name not in bboxes:
            bboxes[cls_name] = []
            probs[cls_name] = []
        (x, y, w, h) = ROIs[0, ii, :]
        cls_num = np.argmax(P_cls[0, ii, :])

        (tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
        tx /= C.classifier_regr_std[0]
        ty /= C.classifier_regr_std[1]
        tw /= C.classifier_regr_std[2]
        th /= C.classifier_regr_std[3]
        x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)

        bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)])
        probs[cls_name].append(np.max(P_cls[0, ii, :]))


32
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
32
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
32
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
32
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
32
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
32
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
32
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
32
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow
wow


In [55]:
R.shape[0]//C.num_rois

9

In [54]:
C.num_rois

32

In [60]:
bboxes

{}

In [58]:
class_mapping

{0: 'bus',
 1: 'person',
 2: 'chair',
 3: 'tvmonitor',
 4: 'cow',
 5: 'bird',
 6: 'car',
 7: 'motorbike',
 8: 'dog',
 9: 'horse',
 10: 'cat',
 11: 'bicycle',
 12: 'pottedplant',
 13: 'boat',
 14: 'bottle',
 15: 'diningtable',
 16: 'train',
 17: 'sofa',
 18: 'aeroplane',
 19: 'sheep',
 20: 'bg'}

In [None]:
all_dets = []
for key in bboxes:
    bbox = np.array(bboxes[key])
    new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
    for jk in range(new_boxes.shape[0]):
        (x1, y1, x2, y2) = new_boxes[jk,:]
        (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)
        cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)
        textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
        all_dets.append((key,100*new_probs[jk]))
        (retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
        textOrg = (real_x1, real_y1-0)
        cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
        cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
        cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
cv2.imwrite('/Users/zehaodong/Desktop/faster-rcnn-keras/results_vgg_imgs/{}.png'.format(idx),img)