In [35]:
import random
import pprint
import sys
import time
import numpy as np
from optparse import OptionParser
import pickle
import math
import cv2
import copy
from matplotlib import pyplot as plt
import tensorflow as tf
import pandas as pd
import os

from sklearn.metrics import average_precision_score

from keras import backend as K
from keras.optimizers import Adam, SGD, RMSprop
from keras.layers import Flatten, Dense, Input, Conv2D, MaxPooling2D, Dropout
from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, TimeDistributed
from keras.engine.topology import get_source_inputs
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.objectives import categorical_crossentropy

from keras.models import Model
from keras.utils import generic_utils
from keras.engine import Layer, InputSpec
from keras import initializers, regularizers

In [36]:
class Config:
    
    def __init__(self):
        
        #Print the process or not
        self.verbose = True
        
        #Name of base network
        self.network = 'vgg'
        
        #Settings for data augmentation
        self.use_horizontal_flips = False
        self.use_vertical_flips = False
        self.rot_90 = False
        
        self.anchor_box_scales = [64, 128, 256]
        self.anchor_box_ratios = [[1,1], [1./math.sqrt(2), 2/math.sqrt(2)], [2./math.sqrt(2), 1./math.sqrt(2)]]
        
        self.im_size = 300
        self.img_channel_mean = [103.939, 116.779, 123.68]
        self.img_scaling_factor = 1.0
        
        self.num_rois = 4
        self.rpn_stride = 16
        
        self.balanced_classes = False
        
        self.std_scaling = 4.0
        
        self.classifier_regr_std = [8.0, 8.0, 4.0, 4.0]
        
        self.rpn_min_overlap = 0.3
        self.rpn_max_overlap = 0.7
        
        self.classifier_min_overlap = 0.1
        self.classifier_max_overlap = 0.5
        
        self.class_mapping = None
        
        self.model_path = None
        
        

In [37]:
def get_data(input_path):
    
    found_bg = False
    all_imgs = {}
    
    classes_count = {}
    
    class_mapping = {}
    
    visualise = True
    
    i = 1
    
    with open(input_path, 'r') as f:
        
        print('Parsing annotation files')
        
        for line in f:
            
            #Print process
            
            sys.stdout.write('\r' + 'idx=' + str(i))
            i += 1
            
            line_split = line.strip().split(',')
            
            (filename, x1, y1, x2, y2, class_name) = line_split
            
            if class_name not in classes_count:
                classes_count[class_name] = 1
            else:
                classes_count[class_name] += 1
            
            if class_name not in class_mapping:
                if class_name == 'bg' and found_bg == False:
                    print('Found class name with special name bg. Will be treated as a background region (this is usually for hard negative mining).')
                    found_bg = True
                
                class_mapping[class_name] = len(class_mapping)
            
            if filename not in all_imgs:
                all_imgs[filename] = {}
                img = cv2.imread(filename)
                (rows, cols) = img.shape[:2]
                all_imgs[filename]['filepath'] = filename
                all_imgs[filename]['width'] = cols
                all_imgs[filename]['height'] = rows
                all_imgs[filename]['bboxes'] = []
                
                
            all_imgs[filename]['bboxes'].append({'class': class_name, 'x1': int(x1), 'x2': int(x2), 'y1': int(y1), 'y2': int(y2)})
        
        all_data = []
        for key in all_imgs:
            all_data.append(all_imgs[key])
            
        if found_bg:
            if class_mapping['bg'] != len(class_mapping) - 1:
                key_to_switch = [key for key in class_mapping.keys() if class_mapping[key] == len(class_mapping) -1][0]
                val_to_switch = class_mapping['bg']
                class_mapping['bg'] = len(class_mapping) - 1
                class_mapping[key_to_switch] = val_to_switch
                    
        return all_data, classes_count, class_mapping

In [38]:
def RoiPoolingConv(Layer):
    
    def __init__(self, pool_size, num_rois, **kwargs):
        
        self.dim_ordering = K.image_dim_ordering()
        self.pool_size = pool_size
        self.num_rois = num_rois
        
        super(RoiPoolingConv, self).__init__(**kwargs)
        
    def build(self, input_shape):
        self.nb_channels = input_shape[0][3]
        
    def compute_output_shape(self, input_shape):
        return None, self.num_rois, self.pool_size, self.nb_channels
    
    def call(self, x, mask=None):
        
        assert(len(x) == 2)
        
        img = x[0]
        
        rois = x[1]
        
        input_shape = K.shape(img)
        
        outputs = []
        
        for roi_idx in range(self.num_rois):
            
            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]
            
            x = K.cast(x, 'int32')
            y = K.cast(y, 'int32')
            w = K.cast(w, 'int32')
            h = K.cast(h, 'int32')
            
            rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
            outputs.append(rs)
            
        final_output = K.concatenate(outputs, axis = 0)
        
        final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))
        
        final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))
        
        return final_output
    
    def get_config(self):
        config = {'pool_size': self.pool_size,
                 'num_rois': self.num_rois}
        
        base_config = super(RoiPoolingConv, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
                

In [39]:
def get_img_output_length(width, height):
    
    def get_output_length(input_length):
        return input_length/16
    
    return get_output_length(width), get_output_length(height)

def nn_base(input_tensor = None, trainable = False):
    
    input_shape = (None, None, 3)
    
    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor
            
    bn_axis = 3
    
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides = (2, 2), name='block1_pool')(x)
    
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), activation='relu', padding='same', name='block2_conv3')(x)
    
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = MaxPooling2D((2, 2), activation='relu', padding='same', name='block2_conv4')(x)
    
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = MaxPooling2D((2, 2), activation='relu', padding='same', name='block4_conv4')(x)
    
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
    x = MaxPooling2D((2, 2), activation='relu', padding='same', name='block5_conv4')(x)
    
    return x

In [40]:
def rpn_layer(base_layers, num_anchors):
    x = Conv2D(512, (3, 3), padding='same', activation='relu', kernel_initializer = 'normal', name='rpn_conv1')(base_layers)
    
    x_class = Conv2D(num_anchors, (1,1), activation='sigmoid', kernel_initializer='uniform', name='rpn_output_class')(x)
    
    x_regr = Conv2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_output_regress')(x)
    
    return [x_class, x_regr, base_layers]

In [41]:
def classifier_layer(base_layers, input_rois, num_rois, nb_classes = 4):
    
    input_shape = (num_rois, 7, 7, 512)
    
    pooling_regions = 7
    
    out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois])
    
    out = TimeDistributed(Flatten(name = 'flatten'))
    out = TimeDistributed(Dense(4096, activation='relu', name = 'fc1'))(out)
    out = TimeDistributed(Dropout(0.5))(out)
    out = TimeDistributed(Dense(4096, activation='relu', name = 'fc2'))(out)
    out = TimeDistributed(Dropout(0.5))(out)
    
    out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out)
    
    return [out_class, out_regr]

In [42]:
def union(au, bu, area_intersection):
    area_a = (au[2] - au[0]) * (au[3] - au[1])
    area_b = (bu[2] - bu[0]) * (bu[3] - bu[1])
    area_union = area_a + area_b - area_intersection
    return area_union

def intersection(ai, bi):
    x = max(ai[0], bi[0])
    y = max(ai[1], bi[1])
    w = min(ai[2], bi[2]) - x
    h = min(ai[3], bi[3]) - y
    if w < 0 or h < 0:
        return 0
    return w*h

def iou(a, b):
    if a[0] >= 2 or a[1] >= a[3] or b[0] >= b[2] or b[1] >= b[3]:
        return 0.0
    
    area_i = intersection(a, b)
    area_u = union(a, b, area_i)
    
    return float(area_i) / float(area_u + 1e-6)

In [43]:
def calc_rpn(C, img_data, width, height, resized_width, resized_height, img_length_calc_function):
    
    downscale = float(C.rpn_stride)
    anchor_sizes = C.anchor_box_scales
    anchor_ratios = C.anchor_box_ratios
    num_anchors = len(anchor_sizes) * len(anchor_ratios)
    
    (output_width, output_height) = img_length_calc_function(resized_width, resized_height)
    
    n_anchratios = len(anchor_ratios)
    
    y_rpn_overlap = np.zeros((output_height, output_width, num_anchors))
    y_is_box_valid = np.zeros((output_height, output_width, num_anchors))
    y_rpn_regr = np.zeros((output_height, output_width, num_anchors * 4))
    
    num_bboxes = len(img_data['bboxes'])
    
    num_anchors_for_bbox = np.zeros(num_bboxes).astype(int)
    best_anchor_for_bbox = -1*np.ones((num_bboxes, 4)).astype(int)
    best_iou_for_bbox = np.zeros(num_bboxes).astype(np.float32)
    best_x_for_bbox = np.zeros((num_bboxes, 4)).astype(int)
    best_dx_for_bbox = np.zeros((num_bboxes, 4)).astype(float32)
    
    gta = np.zeros((num_bboxes, 4))
    for bbox_num, bbox in enumerate(img_data['bboxes']):
        gta[bbox_num, 0] = bbox['x1'] * (resized_width / float(width))
        gta[bbox_num, 1] = bbox['x2'] * (resized_width / float(width))
        gta[bbox_num, 2] = bbox['y1'] * (resized_height / float(height))
        gta[bbox_num, 3] = bbox['y2'] * (rresized_height / float(height))
    
    for anchor_size_idx in range(len(anchor_sizes)):
        for anchor_ratio_idx in range(n_anchratios):
            anchor_x = anchor_sizes[anchor_size_idx] * anchor_ratios[anchor_ratio_idx][0]
            anchor_y = anchor_sizes[anchor_size_idx] * anchor_ratios[anchor_ratio_idx][1]
            
            for ix in range(output_width):
                x1_anc = downscale * (ix + 0.5) - anchor_x / 2
                x2_anc = downscale * (ix + 0.5) + anchor_x / 2
                
                if x1_anc < 0 or x2_anc > resized_width:
                    continue
                
                for jy in range(output_height):
                    
                    y1_anc = downscale * (jy + 0.5) - anchor_y / 2
                    y2_anc = downscale * (jy + 0.5) - anchor_y / 2
                    
                    if y1_anc < 0 or y2_anc > resized_height:
                        continue
                    
                    bbox_type = 'neg'
                    
                    best_iou_for_loc = 0.0
                    
                    for bbox_num in range(num_bboxes):
                        
                        curr_iou = iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]], [x1_anc, y1_anc, x2_anc, y2_anc])
                        
                        if curr_iou > best_iou_for_bbox[bbox_num] or curr_iou > C.rpn_max_overlap:
                            cx = (gta[bbox_num, 0] + gta[bbox_num, 1]) / 2.0
                            cy = (gta[bbox_num, 2] + gta[bbox_num, 3]) / 2.0
                            cxa = (x1_anc + x2_anc) / 2.0
                            cya = (y1_anc + y2_anc) / 2.0
                            
                            tx = (cx - cxa) / (x2_anc - x1_anc)
                            ty = (cy - cya) / (y2_anc - y1_anc)
                            tw = np.log((gta[bbox_num, 1] - gta[bbox_num, 0]) / (x2_anc - x1_anc))
                            th = np.log((gta[bbox_num, 3] - gta[bbox_num, 2]) / (y2_anc - y1_anc))
                            
                        if img_data['bboxes'][bbox_num]['class'] != 'bg':
                            
                            if curr_iou > best_iou_for_bbox[bbox_num]:
                                best_anchor_for_bbox[bbox_num] = [jy, ix, anchor_ratio_idx, anchor_size_idx]
                                best_iou_for_bbox[bbox_num] = curr_iou
                                best_x_for_bbox[bbox_num,:] = [x1_anc, x2_anc, y1_anc, y2_anc]
                                best_dx_for_bbox[bbox_num,:] = [tx, ty, tw, th]
                                
                            if curr_iou > C.rpn_max_overlap:
                                bbox_type = 'pos'
                                num_anchors_for_bbox[bbox_num] += 1
                                
                                if curr_iou > best_iou_for_loc:
                                    best_iou_for_loc = curr_iou
                                    
                                    best_regr = (tx, ty, tw, th)
                                    
                            if C.rpn_min_overlap < curr_iou < C.rpn_max_overlap:
                                
                                if bbox_type != 'pos':
                                    bbox_type = 'neutral'
                                    
                    if bbox_type == 'neg':
                        y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1
                        y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0
                        
                    elif bbox_type =='neutral':
                        y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0
                        y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0
                                
                    elif bbox_type == 'pos':
                        y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1
                        y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1
                        start = 4 * (anchor_ratio_idx + n_anchor * anchor_size_idx)
                        y_rpn_regr[jy, ix, start:start+4] = best_regr
                        
    
    for idx in range(num_anchors_for_bbox.shape[0]):
        if num_anchor_for_bbox[idx] == 0:
            if best_anchor_for_bbox[idx, 0] == -1:
                continue
            y_is_box_valid[best_anchor_for_bbox[idx, 0], best_anchor_for_bbox[idx, 1], best_anchor_for_bbox[idx, 2] + n_anchratios * best_anchor_for_bbox[idx, 3]] = 1
            y_rpn_overlap[best_anchor_for_bbox[idx, ]]
            
            start = 4 * (best_anchor_for_bbox[idx, 2] + n_anchratios * best_anchor_for_bbox[idx, 3])
            y_rpn_regr[best_anchor_for_bbox[idx, 0], best_anchor_for_bbox[idx, 1],start:start+4] = best_dx_for_bbox[idx, :]
            
    y_rpn_overlap = np.transpose(u_rpn_overlap, (2, 0, 1))
    y_rpn_overlap = np.expand_dims(y_rpn_overlap, axis=0)
    
    y_is_box_valid = np.transpose(y_is_box_valid, (2, 0, 1))
    y_is_box_valid = np.expand_dims(y_is_box_valid, axis = 0)
    
    y_rpn_regr = np.transpose(y_rpn_regr, (2, 0, 1))
    y_rpn_regr = np.expand_dims(y_rpn_regr, axis = 0)
    
    pos_locs = np.where(np.logical_and(y_rpn_overlap[0, :, :, :] == 1, y_is_box_valid[0, :, :, :] == 1))
    neg_locs = np.where(np.logical_and(y_rpn_overlap[0, :, :, :] == 0, y_is_box_valid[0, :, :, :] == 1))
    
    num_pos = len(pos_loc[0])
    
    num_regions = 256
    
    if len(pos_locs[0]) > num_regions/2:
        val_locs = random.sample(range(len(pos_locs[0])), len(pos_locs[0]) - num_regions/2)
        y_is_box_valid[0, pos_locs[0][val_locs], pos_locs[1][val_locs], pos_locs[2][val_locs]] = 0
        num_pos = num_regions / 2
        
    if len(neg_locs[0]) + num_pos > num_regions:
        val_locs = random.sample(range(len(neg_locs[0])), len(neg_locs[0]) - num_pos)
        y_is_box_valid[0, neg_locs[0][val_locs], neg_locs[1][val_locs], neg_locs[2][val_locs]] = 0
        
    y_rpn_cls = np.concatenate([y_is_box_valid, y_rpn_overlap], axis = 1)
    y_rpn_regr = np. concatenate([ng.repeat(y_rpn_overlap, 4, axis = 1), y_rpn_regr], axis = 1)
    
    return np.copy(y_rpn_cls), np.copy(y_rpn_regr), num_pos
            
    

In [44]:
def get_new_img_size(width, height, img_min_side = 300):
    if width <= height:
        f = float(img_min_side) / width
        resized_height = int(f * height)
        resized_width = img_min_side
    else:
        f = float(img_min_side) / height
        resized_width = int(f * width)
        resized_height = img_min_side
        
def augment(img_data, config, augment=True):
    assert 'filepath' in img_data
    assert 'bboxes' in img_data
    assert 'width' in img_data
    assert 'height' in img_data
    
    img_data_aug = copy.deepcopy(img_data)
    
    img = cv2.imread(img_data_aug['filepath'])
    
    if augment:
            rows, cols = img.shape[:2]
            
            if config.use_horizontal_flips and np.random.randint(0, 2) == 0:
                img = cv2.flip(img, 1)
                for bbox in img_data_aug['bboxes']:
                    x1 = bbox['x1']
                    x2 = bbox['x2']
                    bbox['x2'] = cols - x1
                    bbox['x1'] = cols - x2
                    
            if config.use_vertical_flips and np.random.randint(0, 2) == 0:
                img = cv2.flip(img, 0)
                for bbox in img_data_aug['bboxes']:
                    y1 = bbox['y1']
                    y2 = bbox['y2']
                    bbox['y2'] = rows - y1
                    bbox['y1'] = rows - y2
                    
            if config.rot_90:
                angle = np.random.choice([0, 90, 180, 270], 1)[0]
                if angle == 270:
                    img = np.transpose(img, (1, 0, 2))
                    img = cv2.flip(img, 0)
                elif angle == 180:
                    img = cv2.flip(img, -1)
                elif angle == 90:
                    img = np.transpose(img, (1, 0, 2))
                    img = cv2.flip(img, 1)
                elif angle == 0:
                    pass
                
                for bbox in img_data_aug['bboxes']:
                    x1 = bbox['x1']
                    x2 = bbox['x2']
                    y1 = bbox['y1']
                    y2 = bbox['y2']
                if angle == 270:
                    bbox['x1'] = y1
                    bbox['x2'] = y2
                    bbox['y1'] = cols - x2
                    bbox['y2'] = cols - x1
                elif angle == 180:
                    bbox['x2'] = cols - x1
                    bbox['x1'] = cols - x2
                    bbox['y2'] = rows - y1
                    bbox['y1'] = rows - y2
                elif angle == 90:
                    bbox['x1'] = rows - y2
                    bbox['x2'] = rows - y1
                    bbox['y1'] = x1
                    bbox['y2'] = x2        
                elif angle == 0:
                    pass
                        
    img_data_aug['width'] = img.shape[1]
    img_data_aug['height'] = img.shape[0]
    return img_data_aug, img

In [82]:
def get_anchor_gt(all_img_data, C, img_length_calc_function, mode='train'):
    while True:
        
        print(type(all_img_data))
        
        for img_data in all_img_data:
            try:

                print(type(img_data))
                
                if mode == 'train':
                    img_data_aug, x_img = augment(img_data, C, augment=True)
                else:
                    img_data_aug, x_img = augment(img_data, C, augment=False)

                print(type(img_data_aug))
                    
                (width, height) = (img_data_aug['width'], img_data_aug['height'])
                (rows, cols, _) = x_img.shape

                assert cols == width
                assert rows == height

                (resized_width, resized_height) = get_new_img_size(width, height, C.im_size)

                x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
                debug_img = x_img.copy()

                try:
                    print(" ")
                    y_rpn_cls, y_rpn_regr, num_pos = calc_rpn(C, img_data_aug, width, height, resized_width, resized_height, img_length_calc_function)
                except:
                    continue
                
                print(type(x_img))

                x_img = x_img[:,:, (2, 1, 0)]  # BGR -> RGB
                x_img = x_img.astype(np.float32)
                x_img[:, :, 0] -= C.img_channel_mean[0]
                x_img[:, :, 1] -= C.img_channel_mean[1]
                x_img[:, :, 2] -= C.img_channel_mean[2]
                x_img /= C.img_scaling_factor

                x_img = np.transpose(x_img, (2, 0, 1))
                x_img = np.expand_dims(x_img, axis=0)

                print(type(y_rpn_regr))
                y_rpn_regr[:, y_rpn_regr.shape[1]//2:, :, :] *= C.std_scaling

                x_img = np.transpose(x_img, (0, 2, 3, 1))
                print("y_rpn_cls type")
                print(type(y_rpn_cls))
                y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 1))
                y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 1))

                yield np.copy(x_img), [np.copy(y_rpn_cls), np.copy(y_rpn_regr)], img_data_aug, debug_img, num_pos

            except Exception as e:
                print("Test")
                print(e)
                #continue

In [46]:
lambda_rpn_regr = 1.0
lambda_rpn_class = 1.0

lambda_cls_regr = 1.0
lambda_cls_class = 1.0

epsilon = 1e-4

In [47]:
def rpn_loss_regr(num_anchors):
    
    def rpn_loss_regr_fixed_num(y_true, y_pred):
        
        x = y_true[:, :, :, 4 * num_anchors:] - y_pred
        
        x_abs = K.abs(x)
        
        x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32)
        
        return lambda_rpn_regr * K.sum(
            y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors])
        
    return rpn_loss_regr_fixed_num


def rpn_loss_cls(num_anchors):
    
    def rpn_loss_cls_fixed_num(y_true, y_pred):
        return lambda_rpn_class * K.sum(y_true[:, :, :, :num_anchors] * K.binary_crossentropy(y_pred[:, :, :, :], y_true[:, :, :, num_anchors])) / K.sum(epsilon + y_true[:, :, :, :num_anchors])
    
    return rpn_loss_cls_fixed_num


def class_loss_regr(num_classes):
    
    def class_loss_regr_fixed_num(y_true, y_pred):
        x = y_true[:, :, 4*num_classes:] - y_pred
        x_abs = K.abs(x)
        x_bool = K.cast(K.less_equal(x_abs, 1.0), 'float32')
        return lambda_cls_regr * K.sum(y_true[:, :, :4*num_classes] * (x_bool * (0.5*x*x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :4*num_classes])
    
    return class_loss_regr_fixed_num

def class_loss_cls(y_true, y_pred):
    return lambda_cls_class * K.mean(categorical_crossentropy(y_true[0, :, :], y_pred[0, :, :]))

    

In [48]:
def non_max_suppression_fast(boxes, probs, overlap_thresh = 0.9, max_boxes = 300):
    
    if len(boxes) == 0:
        return []
    
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    
    np.testing.assert_array_less(x1, x2)
    np.testing.assert_array_less(y1, y2)
    
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")
        
    pick = []
    
    area = (x2 - x1) * (y2 - y1)
    
    idxs = np.argsort(probs)
    
    while len(idxs) > 0:
        
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
        
        xx1_int = np.maximum(x1[i], x1[idxs[:last]])
        yy1_int = np.maximum(y1[i], y1[idxs[:last]])
        xx2_int = np.minimum(x2[i], x2[idxs[:last]])
        yy2_int = np.minimum(y2[i], y2[idxs[:last]])
        
        ww_int = np.maximum(0, xx2_int - xx1_int)
        hh_int = np.maximum(0, yy2_int - yy1_int)
        
        area_int = ww_int * hh_int
        
        area_union = area[i] + area[idxs[:last]] - area_int
        
        overlap = area_int / (area_union + 1e-6)
        
        idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlap_thresh)[0])))
        
        if len(pick) >= max_boxes:
            break
            
    boxes = boxes[pick].astype("int")
    probs = probs[pick]
    return boxes, probs
    
    
def apply_regr_np(X, T):
    
    try:
        x = X[0, :, :]
        y = X[1, :, :]
        w = X[2, :, :]
        h = X[3, :, :]
        
        tx = T[0, :, :]
        ty = T[1, :, :]
        tw = T[2, :, :]
        th = T[3, :, :]
        
        cx = x + w/2.
        cy = y + h/2.
        cx1 = tx * w + cx
        cy1 = ty * h + cy
        
        w1 = np.exp(tw.astype(np.float64)) * w
        h1 = np.exp(th.astype(np.float64)) * h
        x1 = cx1 - w1/2.
        y1 = cy1 - h1/2.
        
        x1 = np.round(x1)
        y1 = np.round(y1)
        w1 = np.round(w1)
        h1 = np.round(h1)
        
        return np.stack([x1, y1, w1, h1])
    except Exception as e:
        print(e)
        return X
    
    
def apply_regr(x, y, w, h, tx, ty, tw, th):
    try:
        cx = x + w/2.
        cy = y + h/2.
        cx1 = tx * w + cx
        cy1 = ty * h + cy
        w1 = math.exp(tw) * w
        h1 = math.exp(th) * h
        x1 = cx1 - w1/2.
        y1 = cy1 - h1/2.
        x1 = int(round(x1))
        y1 = int(round(y1))
        w1 = int(round(w1))
        h1 = int(round(h1))
        
        return x1, y1, w1, h1
    
    except ValueError:
        return x, y, w, h
    except OverflowError:
        return x, y, w, h
    except Exception as e:
        print(e)
        return x, y, w, h

def calc_iou(R, img_data, C, class_mapping):
    
    bboxes = img_data['bboxes']
    (width, height) = (img_data['width'], img_data['height'])
    (resized_width, resized_height) = get_new_img_size(width, height, C.im_size)
    
    gta = np.zeros((len(bboxes), 4))
    
    for bbox_num, bbox in enumerate(bboxes):
        gta[bbox_num, 0] = int(round(bbox['x1'] * (resized_width / float(width))/C.rpn_stride))
        gta[bbox_num, 1] = int(round(bbox['x2'] * (resized_width / float(width))/C.rpn_stride))
        gta[bbox_num, 2] = int(round(bbox['y1'] * (resized_height / float(height))/C.rpn_stride))
        gta[bbox_num, 3] = int(round(bbox['y2'] * (resized_height / float(height))/C.rpn_stride))
        
    x_roi = []
    y_class_num = []
    y_class_regr_coords = []
    y_class_regr_label = []
    IoUs = []
    
    for ix in range(R.shape[0]):
        (x1, y1, x2, y2) = R[ix, :]
        x1 = int(round(x1))
        y1 = int(round(y1))
        x2 = int(round(x2))
        y2 = int(round(y2))
        
        best_iou = 0.0
        best_bbox = -1
        
        for bbox_num in range(len(bboxes)):
            curr_iou = iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]], [x1, y1, x2, y2])
            if curr_iou > best_iou:
                best_iou = curr_iou
                best_bbox = bbox_num
                
        if best_iou < C.classifier_min_overlap:
            continue
            
        else:
            w = x2 - x1
            h = y2 - y1
            
            x_roi.append([x1, y1, w, h])
            IoUs.append(best_iou)
            
            if C.classifier_min_overlap <= best_iou < C.classifier_max_overlap:
                cls_name = 'bg'
            elif C.classifier_max_overlap <= best_iou:
                cls_name = bboxes[best_bbox]['class']
                cxg = (gta[best_bbox, 0] + gta[best_bbox, 1]) / 2.0
                cyg = (gta[best_bbox, 2] + gta[best_bbox, 3]) / 2.0
                
                cx = x1 + w / 2.0
                cy = y1 + h / 2.0
                
                tx = (cxg - cx) / float(w)
                ty = (cyg - cy) / float(h)
                tw = np.log((gta[best_bbox, 1] - gta[best_bbox, 0]) / float(w))
                th = np.log((gta[best_bbox, 3] - gta[best_bbox, 2]) / float(h))
                
            else:
                print('roi = {}'.format(best_iou))
                raise RuntimeError
        
        class_num = class_mapping[cls_name]
        class_label = len(class_mapping) * [0]
        class_label[class_num] = 1
        y_class_num.append(copy.deepcopy(class_label))
        coords = [0] * 4 * (len(class_mapping) - 1)
        labels = [0] * 4 * (len(class_mapping) - 1)
        
        if cls_name != 'bg':
            label_pos = 4 * class_num
            sx, sy, sw, sh = C.classifier_regr_std
            coords[label_pos:4+label_pos] = [sx*tx, sy*ty, sw*tw, sh*th]
            labels[label_pos:4+label_pos] = [1, 1, 1, 1]
        else:
            y_class_regr_coords.append(copy.deepcopy(coords))
            y_class_regr_label.append(copy.deepcopy(labels))
            
    if len(x_roi) == 0:
        return None, None, None, None
    
    X = np.array(x_roi)
    Y1 = np.array(y_class_num)
    Y2 = np.concatenate([np.array(y_class_regr_label), np.array(y_class_regr_coords)], axis = 1)
    
    return np.expand_dims(X, axis = 0), np.expand_dims(Y1, axis = 0), np.expand_dims(Y2, axis = 0), IoUs

In [49]:
def rpn_to_roi(rpn_layer, regr_layer, C, dim_ordering, use_regr=True, max_boxes =300, overlap_thresh = 0.9):
    
    regr_layer = regr_layer / C.std_scaling
    
    anchor_sizes = C.anchor_box_scales
    anchor_ratios = C.anchor_box_ratios
    
    assert rpn_layer.shape[0] == 1
    
    (rows, cols) = rpn_layer.shape[1:3]
    
    curr_layer = 0
    
    A = np.zeros((4, rpn_layer.shape[1], rpn_layer.shape[2], rpn_layer.shape[3]))
    
    for anchor_size in anchor_sizes:
        for anchor_ratio in anchor_ratios:
            anchor_x = (anchor_size * anchor_ratio[0])/C.rpn_stride
            anchor_y = (anchor_size * anchor_ratio[1])/C.rpn_stride
            
            regr = regr_layer[0, :, :, 4 * curr_layer + 4]
            
            regr = np.transpose(regr, (2, 0, 1))
            
            X, Y = np.meshgrid(np.arange(cols), np.arange(rows))
            
            A[0, :, :, curr_layer] = X - anchor_x/2
            A[1, :, :, curr_layer] = Y - anchor_y/2
            A[2, :, :, curr_layer] = anchor_x
            A[3, :, :, curr_layer] = anchor_y
            
            if use_regr:
                A[:, :, :, curr_layer] = apply_regr_np(A[:, :, :, curr_layer], regr)
                
            A[2, :, :, curr_layer] = np.maximum(1, A[2, :, :, curr_layer])
            A[3, :, :, curr_layer] = np.maximum(1, A[3, :, :, curr_layer])
            
            A[2, :, :, curr_layer] += A[0, :, :, curr_layer]
            A[3, :, :, curr_layer] += A[1, :, :, curr_layer]
            
            A[0, :, :, curr_layer] = np.maximum(0, A[0, :, :, curr_layer])
            A[1, :, :, curr_layer] = np.maximum(0, A[1, :, :, curr_layer])
            A[2, :, :, curr_layer] = np.minimum(cols - 1, A[2, :, :, curr_layer])
            A[3, :, :, curr_layer] = np.minimum(rows - 1, A[3, :, :, curr_layer])
            
            curr_layer += 1
            
    all_boxes = np.reshape(A.transpose((0, 3, 1, 2)), (4, -1)).transpose((1, 0))
    all_probs = rpn_layer.transpose((0, 3, 1, 2)).reshape((-1))
    
    x1 = all_boxes[:, 0]
    y1 = all_boxes[:, 1]
    x2 = all_boxes[:, 2]
    y2 = all_boxes[:, 3]
    
    idxs = np.where((x1 - x2 >= 0) | (y1 - y2 >= 0))
    
    all_boxes = np.delete(all_boxes, idxs, 0)
    all_probs = np.delete(all_probs, idxs, 0)
    
    result = non_max_suppression_fast(all_boxes, all_probs, overlap_thresh=overlap_thresh, max_boxes = max_boxes)[0]
    
    return result

    
    
    
    

In [50]:
base_path = '.\\'

train_path = '.\\Dataset\\Open Images Dataset v4\\annotation.txt'

num_rois = 4

horizontal_flips = True
vertical_flips = True
rot_90 = True

output_weight_path = os.path.join(base_path, 'model\\model_frcnn_vgg.hdf5')

record_path = os.path.join(base_path, 'model\\record.csv')

base_weight_path = os.path.join(base_path, 'model\\vgg16_weights_tf_dim_ordering_tf_kernels.h5')

config_output_filename = os.path.join(base_path, 'model_vgg_config.pickle')

In [51]:
C = Config()

C.use_horizontal_flips = horizontal_flips
C.use_vertical_flips = vertical_flips
C.rot_90 = rot_90

C.record_path = record_path
C.model_path = output_weight_path
C.num_rois = num_rois

C.base_net_weights = base_weight_path

In [52]:
st = time.time()
train_imgs, classes_count, class_mapping = get_data(train_path)
print()
print('Spend %0.2f mins to load the data' % ((time.time()-st)/60) )

Parsing annotation files
idx=7174
Spend 0.33 mins to load the data


In [None]:
train_imgs

In [54]:
if 'bg' not in classes_count:
    classes_count['bg'] = 0
    class_mapping['bg'] = len(class_mapping)
    
C.class_mapping = class_mapping

print('Training images per class:')
pprint.pprint(classes_count)
print('Num classes (including bg) = {}'.format(len(classes_count)))
print(class_mapping)

with open(config_output_filename, 'wb') as config_f:
    pickle.dump(C, config_f)
    print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename))

Training images per class:
{'Car': 2235, 'Mobile phone': 1256, 'Person': 3683, 'bg': 0}
Num classes (including bg) = 4
{'Mobile phone': 0, 'Person': 1, 'Car': 2, 'bg': 3}
Config has been written to .\model_vgg_config.pickle, and can be loaded when testing to ensure correct results


In [58]:
random.seed(1)
random.shuffle(train_imgs)

print('Num train samples (images) {}'.format(len(train_imgs)))

Num train samples (images) 2398


In [83]:
data_gen_train = get_anchor_gt(train_imgs, C, get_img_output_length, mode='train')

In [84]:
X, Y, image_data, debug_img, debug_num_pos = next(data_gen_train)

<class 'list'>
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<class 'dict'>
Test
'NoneType' object is not iterable
<class 'dict'>
<cla

KeyboardInterrupt: 