# Intro to Table Detection with Fast RCNN

By taking an ImageNet-pretrained model such as the VGG16, we can add a few more convolutional layers to construct an RPN, or region proposal network. This module extracts regions of interest, or RoIs, that inform a model on where to identify an object. 


When the RoIs are applied, we do max pooling only in the regions of interest, as to find an embedding that uniquely identifies that area of the input and well as building a description of what object might lie in that region. From this description, the model can then categorize that region into one of k categories it was trained to recognize. 


In [1]:
# Train Fast RCNN

import logging
import pprint
import mxnet as mx
import numpy as np

from rcnn.config import config, default, generate_config
from rcnn.symbol import *
from rcnn.core import callback, metric
from rcnn.core.loader import AnchorLoader
from rcnn.core.module import MutableModule
from rcnn.utils.load_data import load_gt_roidb, merge_roidb, filter_roidb
from rcnn.utils.load_model import load_param


def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
              lr=0.001, lr_step='5'):
    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # setup config
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True

    # load symbol
    sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    feat_sym = sym.get_internals()['rpn_cls_score_output']


    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path,
                            flip=not args.no_flip)
              for image_set in image_sets]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb)

    # load training data
    train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
                              ctx=ctx, work_load_list=args.work_load_list,
                              feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
                              anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    print('output shape')
    pprint.pprint(out_shape_dict)

    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        arg_params['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias'])
        arg_params['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        arg_params['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias'])
        arg_params['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias'])
        arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight'])
        arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'])
        arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
        arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
        
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {'momentum': 0.9,
                        'wd': 0.0005,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': (1.0 / batch_size),
                        'clip_gradient': 5}

    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=args.kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, 
            begin_epoch=begin_epoch, num_epoch=end_epoch)



In [None]:
## Training Args
class DictToObject:
    '''
    helper class to encapsulate all the args from dict to obj
    '''
    def __init__(self, **entries):
        self.__dict__.update(entries)

args = {'lr': 0.001, 'image_set': '2007_trainval', 'network': 'vgg',
        'resume': False, 'pretrained': 'model/vgg16', 'root_path': 'data',
        'dataset': 'PascalVOC', 'lr_step': '7', 'prefix': 'model/orge2e', 
        'end_epoch': 10, 'dataset_path': 'data/VOCdevkit', 
        'gpus': '0',
        'no_flip': False, 'no_shuffle': False, 'begin_epoch': 0, 
        'work_load_list': None, 'pretrained_epoch': 0,
        'kvstore': 'device', 'frequent': 20}

args = DictToObject(**args)
if len(args.gpus) > 1:
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
else:
    ctx = [mx.gpu(int(args.gpus))]
train_net(args, ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch,
              lr=args.lr, lr_step=args.lr_step)


INFO:root:voc_2007_trainval num_images 5011
INFO:root:voc_2007_trainval gt roidb loaded from data/cache/voc_2007_trainval_gt_roidb.pkl
INFO:root:voc_2007_trainval append flipped images to roidb


{'ANCHOR_RATIOS': [0.5, 1, 2],
 'ANCHOR_SCALES': [8, 16, 32],
 'FIXED_PARAMS': ['conv1', 'conv2'],
 'FIXED_PARAMS_SHARED': ['conv1', 'conv2', 'conv3', 'conv4', 'conv5'],
 'IMAGE_STRIDE': 0,
 'NUM_ANCHORS': 9,
 'NUM_CLASSES': 21,
 'PIXEL_MEANS': array([ 103.939,  116.779,  123.68 ]),
 'RCNN_FEAT_STRIDE': 16,
 'RPN_FEAT_STRIDE': 16,
 'SCALES': [(600, 1000)],
 'TEST': {'BATCH_IMAGES': 1,
          'CXX_PROPOSAL': True,
          'HAS_RPN': False,
          'NMS': 0.3,
          'PROPOSAL_MIN_SIZE': 16,
          'PROPOSAL_NMS_THRESH': 0.7,
          'PROPOSAL_POST_NMS_TOP_N': 2000,
          'PROPOSAL_PRE_NMS_TOP_N': 20000,
          'RPN_MIN_SIZE': 16,
          'RPN_NMS_THRESH': 0.7,
          'RPN_POST_NMS_TOP_N': 300,
          'RPN_PRE_NMS_TOP_N': 6000},
 'TRAIN': {'ASPECT_GROUPING': True,
           'BATCH_IMAGES': 1,
           'BATCH_ROIS': 128,
           'BBOX_MEANS': [0.0, 0.0, 0.0, 0.0],
           'BBOX_NORMALIZATION_PRECOMPUTED': True,
           'BBOX_REGRESSION_THRESH': 0.

[393 498]
data/VOCdevkit/VOC2007/JPEGImages/000141.jpg
[[ 82  52 475 398]]
[82]
[475]
data/VOCdevkit/VOC2007/JPEGImages/000142.jpg
[[ 35  80 474 265]]
[35]
[474]
data/VOCdevkit/VOC2007/JPEGImages/000143.jpg
[[  0  45 330 332]]
[0]
[330]
data/VOCdevkit/VOC2007/JPEGImages/000146.jpg
[[122 207 219 407]]
[122]
[219]
data/VOCdevkit/VOC2007/JPEGImages/000147.jpg
[[435 104 467 145]
 [386 113 423 148]
 [325 124 354 149]
 [ 98 120 127 152]
 [ 35 111  65 148]]
[435 386 325  98  35]
[467 423 354 127  65]
data/VOCdevkit/VOC2007/JPEGImages/000150.jpg
[[181  77 314 263]
 [121  53 147 107]
 [ 64  50  85 108]]
[181 121  64]
[314 147  85]
data/VOCdevkit/VOC2007/JPEGImages/000153.jpg
[[142 146 263 190]]
[142]
[263]
data/VOCdevkit/VOC2007/JPEGImages/000154.jpg
[[133  75 441 265]]
[133]
[441]
data/VOCdevkit/VOC2007/JPEGImages/000156.jpg
[[119  72 459 351]
 [  4 102  68 143]]
[119   4]
[459  68]
data/VOCdevkit/VOC2007/JPEGImages/000158.jpg
[[233  82 304 206]]
[233]
[304]
data/VOCdevkit/VOC2007/JPEGImages/0

data/VOCdevkit/VOC2007/JPEGImages/000411.jpg
[[ 82 189 113 276]
 [  7 193  41 274]
 [ 55 195  73 235]
 [121 192 152 293]
 [185 194 220 296]
 [150 184 175 292]]
[ 82   7  55 121 185 150]
[113  41  73 152 220 175]
data/VOCdevkit/VOC2007/JPEGImages/000416.jpg
[[101 138 170 247]
 [241 170 319 251]
 [155 153 283 243]]
[101 241 155]
[170 319 283]
data/VOCdevkit/VOC2007/JPEGImages/000417.jpg
[[131  81 326 374]]
[131]
[326]
data/VOCdevkit/VOC2007/JPEGImages/000419.jpg
[[ 14  38 477 321]
 [459  15 498 220]
 [391   0 439  61]]
[ 14 459 391]
[477 498 439]
data/VOCdevkit/VOC2007/JPEGImages/000420.jpg
[[ 25  28 422 346]]
[25]
[422]
data/VOCdevkit/VOC2007/JPEGImages/000424.jpg
[[366 158 435 215]
 [216 196 311 253]
 [ 99 208 173 251]
 [ 14 176  89 226]]
[366 216  99  14]
[435 311 173  89]
data/VOCdevkit/VOC2007/JPEGImages/000427.jpg
[[114  23 461 274]]
[114]
[461]
data/VOCdevkit/VOC2007/JPEGImages/000428.jpg
[[124 105 424 403]
 [  0 322 296 411]
 [149 179 479 413]]
[124   0 149]
[424 296 479]
data/VO

[276 226   1]
[482 347 430]
data/VOCdevkit/VOC2007/JPEGImages/000728.jpg
[[  0  38 413 330]]
[0]
[413]
data/VOCdevkit/VOC2007/JPEGImages/000729.jpg
[[  3   5 365 497]]
[3]
[365]
data/VOCdevkit/VOC2007/JPEGImages/000730.jpg
[[  5  51 495 233]]
[5]
[495]
data/VOCdevkit/VOC2007/JPEGImages/000731.jpg
[[ 92 386 191 499]
 [ 29  74 316 372]
 [164   0 328 272]]
[ 92  29 164]
[191 316 328]
data/VOCdevkit/VOC2007/JPEGImages/000733.jpg
[[177  24 402 382]
 [  2 200 347 434]]
[177   2]
[402 347]
data/VOCdevkit/VOC2007/JPEGImages/000738.jpg
[[  4 123 499 234]]
[4]
[499]
data/VOCdevkit/VOC2007/JPEGImages/000739.jpg
[[  0  29  96 373]
 [ 21  82 114 373]
 [ 85  64 228 373]
 [165  54 261 373]
 [248  54 335 373]
 [307  65 403 373]
 [392  25 497 373]]
[  0  21  85 165 248 307 392]
[ 96 114 228 261 335 403 497]
data/VOCdevkit/VOC2007/JPEGImages/000740.jpg
[[ 96  84 210 275]
 [288 103 396 270]]
[ 96 288]
[210 396]
data/VOCdevkit/VOC2007/JPEGImages/000742.jpg
[[ 88 128 109 167]
 [189 144 499 334]
 [204  98 2

data/VOCdevkit/VOC2007/JPEGImages/000977.jpg
[[321 270 351 294]
 [406 261 464 308]
 [237 244 322 318]
 [207 265 242 295]
 [103 252 179 332]
 [ 46 258 173 331]]
[321 406 237 207 103  46]
[351 464 322 242 179 173]
data/VOCdevkit/VOC2007/JPEGImages/000980.jpg
[[  0  45 385 320]]
[0]
[385]
data/VOCdevkit/VOC2007/JPEGImages/000982.jpg
[[155  36 433 344]]
[155]
[433]
data/VOCdevkit/VOC2007/JPEGImages/000987.jpg
[[110  18 301 400]
 [230  14 283 156]
 [ 68  17 107 161]
 [ 42  94 481 442]]
[110 230  68  42]
[301 283 107 481]
data/VOCdevkit/VOC2007/JPEGImages/000989.jpg
[[391 116 448 176]]
[391]
[448]
data/VOCdevkit/VOC2007/JPEGImages/000991.jpg
[[ 83  86 230 321]
 [100 116 205 470]]
[ 83 100]
[230 205]
data/VOCdevkit/VOC2007/JPEGImages/000993.jpg
[[134 136 286 276]
 [ 34  86 104 182]]
[134  34]
[286 104]
data/VOCdevkit/VOC2007/JPEGImages/000996.jpg
[[197 213 361 374]
 [333 210 472 372]
 [424 212 499 322]
 [  0 156 495 374]
 [407   5 481 174]]
[197 333 424   0 407]
[361 472 499 495 481]
data/VOC

[0]
[498]
data/VOCdevkit/VOC2007/JPEGImages/001279.jpg
[[159  31 336 449]
 [111 223 254 440]
 [  2 164 351 499]]
[159 111   2]
[336 254 351]
data/VOCdevkit/VOC2007/JPEGImages/001281.jpg
[[106 120 331 233]]
[106]
[331]
data/VOCdevkit/VOC2007/JPEGImages/001284.jpg
[[265 122 365 374]
 [124 114 262 303]
 [ 74 116 224 359]
 [  0 113 199 374]
 [374 113 499 374]
 [ 52  89  81 138]]
[265 124  74   0 374  52]
[365 262 224 199 499  81]
data/VOCdevkit/VOC2007/JPEGImages/001286.jpg
[[ 11 123 468 330]]
[11]
[468]
data/VOCdevkit/VOC2007/JPEGImages/001287.jpg
[[302 126 498 304]
 [  0 102 322 400]]
[302   0]
[498 322]
data/VOCdevkit/VOC2007/JPEGImages/001288.jpg
[[ 74  75 432 208]
 [  3 271 143 316]
 [184 271 304 313]]
[ 74   3 184]
[432 143 304]
data/VOCdevkit/VOC2007/JPEGImages/001289.jpg
[[145  97 343 187]]
[145]
[343]
data/VOCdevkit/VOC2007/JPEGImages/001290.jpg
[[ 23 169 474 335]]
[23]
[474]
data/VOCdevkit/VOC2007/JPEGImages/001292.jpg
[[ 88 179 137 343]
 [192  62 439 337]]
[ 88 192]
[137 439]
da

[449]
data/VOCdevkit/VOC2007/JPEGImages/001577.jpg
[[ 71  91 320 162]
 [  0 162 396 374]
 [275   0 421 176]
 [184  32 232  73]
 [137  11 187 107]
 [ 73   2 138 123]
 [ 44  35  76  75]]
[ 71   0 275 184 137  73  44]
[320 396 421 232 187 138  76]
data/VOCdevkit/VOC2007/JPEGImages/001579.jpg
[[158 301 374 499]
 [ 51 253 339 440]
 [106 134 261 405]
 [247 156 374 478]]
[158  51 106 247]
[374 339 261 374]
data/VOCdevkit/VOC2007/JPEGImages/001580.jpg
[[ 67  66 291 340]
 [311  95 497 336]
 [290 223 333 374]
 [305 251 362 374]]
[ 67 311 290 305]
[291 497 333 362]
data/VOCdevkit/VOC2007/JPEGImages/001582.jpg
[[  0 175 162 364]
 [376  77 499 167]]
[  0 376]
[162 499]
data/VOCdevkit/VOC2007/JPEGImages/001586.jpg
[[ 51  53 452 328]
 [149  39 269 236]]
[ 51 149]
[452 269]
data/VOCdevkit/VOC2007/JPEGImages/001588.jpg
[[441 134 499 233]
 [249 129 400 216]
 [  0 132 198 285]]
[441 249   0]
[499 400 198]
data/VOCdevkit/VOC2007/JPEGImages/001590.jpg
[[144 222 227 449]
 [  0  39  72 323]]
[144   0]
[227  

[267 261]
data/VOCdevkit/VOC2007/JPEGImages/001861.jpg
[[ 43 189 492 362]
 [ 16   8 434 362]]
[43 16]
[492 434]
data/VOCdevkit/VOC2007/JPEGImages/001862.jpg
[[209  32 255  64]
 [ 64  60 443 374]]
[209  64]
[255 443]
data/VOCdevkit/VOC2007/JPEGImages/001864.jpg
[[  0 213  97 332]
 [119  59 282 374]
 [284  82 409 374]
 [251  54 439 374]]
[  0 119 284 251]
[ 97 282 409 439]
data/VOCdevkit/VOC2007/JPEGImages/001870.jpg
[[  0  15 396 399]]
[0]
[396]
data/VOCdevkit/VOC2007/JPEGImages/001872.jpg
[[145  21 347 341]]
[145]
[347]
data/VOCdevkit/VOC2007/JPEGImages/001875.jpg
[[357 137 464 233]]
[357]
[464]
data/VOCdevkit/VOC2007/JPEGImages/001877.jpg
[[  2  52 499 307]]
[2]
[499]
data/VOCdevkit/VOC2007/JPEGImages/001878.jpg
[[  0   0 498 345]]
[0]
[498]
data/VOCdevkit/VOC2007/JPEGImages/001881.jpg
[[ 44  76 438 356]
 [383  76 499 202]
 [  5  75 119 119]]
[ 44 383   5]
[438 499 119]
data/VOCdevkit/VOC2007/JPEGImages/001882.jpg
[[290 102 495 222]
 [102 162 209 272]]
[290 102]
[495 209]
data/VOCdevk

[[ 32  61 350 492]]
[32]
[350]
data/VOCdevkit/VOC2007/JPEGImages/002151.jpg
[[ 70   0 346 326]]
[70]
[346]
data/VOCdevkit/VOC2007/JPEGImages/002152.jpg
[[100 195 216 283]]
[100]
[216]
data/VOCdevkit/VOC2007/JPEGImages/002153.jpg
[[125 165 417 290]]
[125]
[417]
data/VOCdevkit/VOC2007/JPEGImages/002155.jpg
[[  1  18 462 302]]
[1]
[462]
data/VOCdevkit/VOC2007/JPEGImages/002156.jpg
[[277 212 386 369]
 [177 205 274 348]
 [ 35 192 106 277]
 [382 199 485 354]
 [  0 190  32 237]
 [200 166 467 304]
 [101  99 166 284]]
[277 177  35 382   0 200 101]
[386 274 106 485  32 467 166]
data/VOCdevkit/VOC2007/JPEGImages/002158.jpg
[[207 142 471 245]]
[207]
[471]
data/VOCdevkit/VOC2007/JPEGImages/002163.jpg
[[ 40  90 424 351]]
[40]
[424]
data/VOCdevkit/VOC2007/JPEGImages/002165.jpg
[[ 53  47 289 470]]
[53]
[289]
data/VOCdevkit/VOC2007/JPEGImages/002166.jpg
[[ 19  73 486 316]]
[19]
[486]
data/VOCdevkit/VOC2007/JPEGImages/002169.jpg
[[ 16 101 331 499]]
[16]
[331]
data/VOCdevkit/VOC2007/JPEGImages/002170.jpg

data/VOCdevkit/VOC2007/JPEGImages/002441.jpg
[[  0  63 457 361]
 [  0  87 498 374]]
[0 0]
[457 498]
data/VOCdevkit/VOC2007/JPEGImages/002442.jpg
[[  0  20 372 496]
 [175   0 192  34]
 [166  67 201 197]]
[  0 175 166]
[372 192 201]
data/VOCdevkit/VOC2007/JPEGImages/002443.jpg
[[  0 174 282 392]
 [ 47 134 330 320]]
[ 0 47]
[282 330]
data/VOCdevkit/VOC2007/JPEGImages/002444.jpg
[[ 85  95 212 368]
 [170 119 287 284]
 [336 271 377 374]
 [469 273 498 371]]
[ 85 170 336 469]
[212 287 377 498]
data/VOCdevkit/VOC2007/JPEGImages/002445.jpg
[[ 11  88 347 489]]
[11]
[347]
data/VOCdevkit/VOC2007/JPEGImages/002448.jpg
[[133  92 243 280]
 [ 52 145 371 434]]
[133  52]
[243 371]
data/VOCdevkit/VOC2007/JPEGImages/002450.jpg
[[219 130 453 328]
 [ 28   0 262 317]]
[219  28]
[453 262]
data/VOCdevkit/VOC2007/JPEGImages/002452.jpg
[[  0  21 462 361]]
[0]
[462]
data/VOCdevkit/VOC2007/JPEGImages/002454.jpg
[[237  72 400 199]]
[237]
[400]
data/VOCdevkit/VOC2007/JPEGImages/002456.jpg
[[415   0 495 125]
 [386   0

[0]
[498]
data/VOCdevkit/VOC2007/JPEGImages/002803.jpg
[[125 153 275 311]
 [237 144 393 293]
 [ 57 108 124 182]
 [110 106 188 150]
 [212 108 304 158]
 [292 113 354 169]
 [167  84 198 124]]
[125 237  57 110 212 292 167]
[275 393 124 188 304 354 198]
data/VOCdevkit/VOC2007/JPEGImages/002804.jpg
[[  0 148  29 175]
 [ 51 130 170 173]
 [379 130 449 153]
 [454 134 499 155]
 [ 28 137  46 178]
 [481 155 499 232]
 [236 155 281 310]
 [112 161 138 257]
 [161 150 196 202]
 [142 169 196 245]
 [267 195 311 290]]
[  0  51 379 454  28 481 236 112 161 142 267]
[ 29 170 449 499  46 499 281 138 196 196 311]
data/VOCdevkit/VOC2007/JPEGImages/002807.jpg
[[ 76 113 395 448]
 [ 93  32 345 383]]
[76 93]
[395 345]
data/VOCdevkit/VOC2007/JPEGImages/002810.jpg
[[140  58 416 255]]
[140]
[416]
data/VOCdevkit/VOC2007/JPEGImages/002812.jpg
[[134 197 264 259]
 [273 141 487 260]]
[134 273]
[264 487]
data/VOCdevkit/VOC2007/JPEGImages/002815.jpg
[[  0  32 332 499]]
[0]
[332]
data/VOCdevkit/VOC2007/JPEGImages/002816.jpg
[

[306 392 499 499]
data/VOCdevkit/VOC2007/JPEGImages/003154.jpg
[[ 32 101 338 301]]
[32]
[338]
data/VOCdevkit/VOC2007/JPEGImages/003155.jpg
[[ 20 120 329 418]]
[20]
[329]
data/VOCdevkit/VOC2007/JPEGImages/003157.jpg
[[ 72  56 238 184]]
[72]
[238]
data/VOCdevkit/VOC2007/JPEGImages/003159.jpg
[[ 32  83 150 315]
 [289  96 428 293]
 [439  62 479 297]
 [193 217 214 285]
 [211 199 232 267]
 [261 293 292 359]]
[ 32 289 439 193 211 261]
[150 428 479 214 232 292]
data/VOCdevkit/VOC2007/JPEGImages/003161.jpg
[[ 50 157 106 275]
 [119 176 170 283]
 [228 166 280 279]
 [275 161 326 275]
 [343 160 387 223]]
[ 50 119 228 275 343]
[106 170 280 326 387]
data/VOCdevkit/VOC2007/JPEGImages/003162.jpg
[[ 17  88 475 354]
 [147  10 385 222]]
[ 17 147]
[475 385]
data/VOCdevkit/VOC2007/JPEGImages/003163.jpg
[[ 59  65 444 239]]
[59]
[444]
data/VOCdevkit/VOC2007/JPEGImages/003164.jpg
[[  0  42 474 310]]
[0]
[474]
data/VOCdevkit/VOC2007/JPEGImages/003165.jpg
[[ 50  51 281 300]]
[50]
[281]
data/VOCdevkit/VOC2007/JPE

[245 445]
data/VOCdevkit/VOC2007/JPEGImages/003436.jpg
[[365 137 426 270]
 [209 100 349 357]
 [ 61  91 222 360]
 [ 23 179  57 235]
 [350  91 378 134]
 [256  57 304 115]]
[365 209  61  23 350 256]
[426 349 222  57 378 304]
data/VOCdevkit/VOC2007/JPEGImages/003439.jpg
[[ 80 176 237 294]
 [221  88 414 294]]
[ 80 221]
[237 414]
data/VOCdevkit/VOC2007/JPEGImages/003441.jpg
[[122  97 395 273]]
[122]
[395]
data/VOCdevkit/VOC2007/JPEGImages/003443.jpg
[[271  95 441 232]]
[271]
[441]
data/VOCdevkit/VOC2007/JPEGImages/003444.jpg
[[ 65  90 333 262]]
[65]
[333]
data/VOCdevkit/VOC2007/JPEGImages/003449.jpg
[[379  19 497 311]
 [217 240 310 293]
 [330 244 393 297]
 [  0   1 123  88]]
[379 217 330   0]
[497 310 393 123]
data/VOCdevkit/VOC2007/JPEGImages/003450.jpg
[[234 124 317 353]
 [144 132 262 275]
 [265 167 365 284]
 [ 77 132 478 369]
 [ 20  57 140 194]]
[234 144 265  77  20]
[317 262 365 478 140]
data/VOCdevkit/VOC2007/JPEGImages/003451.jpg
[[ 92 153 265 224]
 [240 166 288 245]
 [ 34 201 110 276]

 [394 185 448 284]]
[ 64 215 394]
[132 296 448]
data/VOCdevkit/VOC2007/JPEGImages/003711.jpg
[[334 277 368 328]
 [ 31  85 338 343]
 [148 172 160 208]
 [133 176 148 209]]
[334  31 148 133]
[368 338 160 148]
data/VOCdevkit/VOC2007/JPEGImages/003713.jpg
[[272 229 324 275]
 [425 216 498 343]
 [176 233 217 254]
 [  4 220 128 286]
 [130 215 185 254]
 [360 203 420 270]]
[272 425 176   4 130 360]
[324 498 217 128 185 420]
data/VOCdevkit/VOC2007/JPEGImages/003714.jpg
[[202 168 323 288]]
[202]
[323]
data/VOCdevkit/VOC2007/JPEGImages/003717.jpg
[[142 171 386 310]
 [  0  87 147 202]
 [256  31 403 169]
 [ 11  11 203 172]
 [214   1 262  92]
 [304   0 393 162]
 [180  90 331 246]
 [382 118 473 318]
 [407 269 490 325]
 [246 266 308 311]
 [384  80 471 296]
 [ 28   1 146 225]
 [ 91   0 145 174]
 [412   0 499 161]]
[142   0 256  11 214 304 180 382 407 246 384  28  91 412]
[386 147 403 203 262 393 331 473 490 308 471 146 145 499]
data/VOCdevkit/VOC2007/JPEGImages/003721.jpg
[[ 56 102 485 247]]
[56]
[485]
d

data/VOCdevkit/VOC2007/JPEGImages/004009.jpg
[[226 105 347 254]]
[226]
[347]
data/VOCdevkit/VOC2007/JPEGImages/004010.jpg
[[  0 133 499 361]
 [ 50   7 382 417]]
[ 0 50]
[499 382]
data/VOCdevkit/VOC2007/JPEGImages/004011.jpg
[[ 85  96 413 266]]
[85]
[413]
data/VOCdevkit/VOC2007/JPEGImages/004012.jpg
[[ 81 119 322 499]
 [146 225 309 471]
 [  0 112 374 499]]
[ 81 146   0]
[322 309 374]
data/VOCdevkit/VOC2007/JPEGImages/004013.jpg
[[113 169 330 342]]
[113]
[330]
data/VOCdevkit/VOC2007/JPEGImages/004014.jpg
[[  1  74 374 499]
 [ 72  47 372 499]]
[ 1 72]
[374 372]
data/VOCdevkit/VOC2007/JPEGImages/004015.jpg
[[198  22 427 374]]
[198]
[427]
data/VOCdevkit/VOC2007/JPEGImages/004016.jpg
[[109  58 389 291]]
[109]
[389]
data/VOCdevkit/VOC2007/JPEGImages/004017.jpg
[[ 50   0 263 216]
 [ 50   3 490 374]]
[50 50]
[263 490]
data/VOCdevkit/VOC2007/JPEGImages/004019.jpg
[[ 26 315 106 359]
 [340 322 416 357]
 [415 320 480 352]
 [117 315 136 366]
 [251 309 266 355]
 [  0 323  26 354]]
[ 26 340 415 117 25

data/VOCdevkit/VOC2007/JPEGImages/004283.jpg
[[  0   0 222 498]]
[0]
[222]
data/VOCdevkit/VOC2007/JPEGImages/004284.jpg
[[288 117 400 184]
 [111 103 249 181]]
[288 111]
[400 249]
data/VOCdevkit/VOC2007/JPEGImages/004286.jpg
[[  8  90 491 225]]
[8]
[491]
data/VOCdevkit/VOC2007/JPEGImages/004287.jpg
[[  0   0 498 399]
 [124  80 425 399]]
[  0 124]
[498 425]
data/VOCdevkit/VOC2007/JPEGImages/004291.jpg
[[  1   0 372 499]]
[1]
[372]
data/VOCdevkit/VOC2007/JPEGImages/004292.jpg
[[191 157 364 411]
 [103  31 218 376]]
[191 103]
[364 218]
data/VOCdevkit/VOC2007/JPEGImages/004293.jpg
[[198 217 380 358]]
[198]
[380]
data/VOCdevkit/VOC2007/JPEGImages/004295.jpg
[[ 47  10 464 350]]
[47]
[464]
data/VOCdevkit/VOC2007/JPEGImages/004296.jpg
[[ 53 234 341 499]
 [ 60  64 261 235]
 [155 102 187 170]
 [212   1 293  97]
 [227  37 330 150]
 [226  85 326 219]
 [231 179 374 383]
 [  0  23  59 259]]
[ 53  60 155 212 227 226 231   0]
[341 261 187 293 330 326 374  59]
data/VOCdevkit/VOC2007/JPEGImages/004298.jpg

[380]
[417]
data/VOCdevkit/VOC2007/JPEGImages/004600.jpg
[[202  39 322 280]]
[202]
[322]
data/VOCdevkit/VOC2007/JPEGImages/004601.jpg
[[234  99 349 316]
 [168  42 300 307]]
[234 168]
[349 300]
data/VOCdevkit/VOC2007/JPEGImages/004604.jpg
[[328  51 498 187]
 [191  57 235 103]
 [ 23  18  89 271]
 [ 70   0 187 314]
 [168 102 318 173]
 [106 131 442 279]]
[328 191  23  70 168 106]
[498 235  89 187 318 442]
data/VOCdevkit/VOC2007/JPEGImages/004605.jpg
[[148 120 407 324]
 [  0   6  89 138]
 [208  28 342 314]
 [  0  35  56 331]]
[148   0 208   0]
[407  89 342  56]
data/VOCdevkit/VOC2007/JPEGImages/004606.jpg
[[149 283 180 316]]
[149]
[180]
data/VOCdevkit/VOC2007/JPEGImages/004607.jpg
[[111 106 407 231]
 [  3 116 152 302]
 [368 127 499 294]]
[111   3 368]
[407 152 499]
data/VOCdevkit/VOC2007/JPEGImages/004609.jpg
[[  0 178 201 411]
 [ 55  99 373 499]]
[ 0 55]
[201 373]
data/VOCdevkit/VOC2007/JPEGImages/004611.jpg
[[ 20 166 145 373]
 [ 92 170 158 265]
 [139 196 237 371]
 [234 178 368 371]
 [365 

data/VOCdevkit/VOC2007/JPEGImages/004896.jpg
[[  0  68 499 242]]
[0]
[499]
data/VOCdevkit/VOC2007/JPEGImages/004897.jpg
[[258 195 319 332]
 [472 187 499 311]
 [221 211 257 332]]
[258 472 221]
[319 499 257]
data/VOCdevkit/VOC2007/JPEGImages/004898.jpg
[[248  16 354 302]
 [340 158 407 290]
 [193 172 227 212]
 [ 71 196 117 261]]
[248 340 193  71]
[354 407 227 117]
data/VOCdevkit/VOC2007/JPEGImages/004902.jpg
[[308 140 499 373]
 [235 123 421 363]
 [111 117 283 374]
 [  0  67 305 374]]
[308 235 111   0]
[499 421 283 305]
data/VOCdevkit/VOC2007/JPEGImages/004903.jpg
[[126 125 463 300]
 [ 28  54 165 158]
 [  0  94  60 246]
 [  0  52  59  80]]
[126  28   0   0]
[463 165  60  59]
data/VOCdevkit/VOC2007/JPEGImages/004905.jpg
[[303   7 498 279]]
[303]
[498]
data/VOCdevkit/VOC2007/JPEGImages/004907.jpg
[[ 60   0 323 328]]
[60]
[323]
data/VOCdevkit/VOC2007/JPEGImages/004910.jpg
[[ 27 181 311 393]]
[27]
[311]
data/VOCdevkit/VOC2007/JPEGImages/004911.jpg
[[401 119 499 240]
 [104  36 430 342]]
[401 10

[[  0 178 370 362]]
[0]
[370]
data/VOCdevkit/VOC2007/JPEGImages/005179.jpg
[[  3  21 356 269]]
[3]
[356]
data/VOCdevkit/VOC2007/JPEGImages/005181.jpg
[[306 222 356 310]]
[306]
[356]
data/VOCdevkit/VOC2007/JPEGImages/005183.jpg
[[140 306 350 447]
 [ 14 299 162 416]
 [  0   9 374 499]]
[140  14   0]
[350 162 374]
data/VOCdevkit/VOC2007/JPEGImages/005185.jpg
[[  0  72 497 342]
 [142 116 262 206]
 [230 138 359 228]]
[  0 142 230]
[497 262 359]
data/VOCdevkit/VOC2007/JPEGImages/005186.jpg
[[290 172 361 292]
 [ 33 180  92 268]
 [371 164 468 300]]
[290  33 371]
[361  92 468]
data/VOCdevkit/VOC2007/JPEGImages/005189.jpg
[[  0  44 361 403]
 [195   0 498 403]]
[  0 195]
[361 498]
data/VOCdevkit/VOC2007/JPEGImages/005190.jpg
[[ 53  24 372 374]
 [374 132 499 306]]
[ 53 374]
[372 499]
data/VOCdevkit/VOC2007/JPEGImages/005191.jpg
[[  0 127 438 284]
 [ 55 205 467 374]
 [415 142 476 222]
 [290 141 340 207]
 [255 125 293 222]
 [232 126 263 229]
 [208 130 246 241]
 [142 141 218 277]
 [ 74 152 153 329]
 

data/VOCdevkit/VOC2007/JPEGImages/005440.jpg
[[ 56 175 499 242]]
[56]
[499]
data/VOCdevkit/VOC2007/JPEGImages/005441.jpg
[[127 290 198 357]
 [423  62 497 374]
 [401  89 439 188]
 [322 111 446 371]
 [246  71 335 373]
 [145  61 240 374]
 [ 50  61 117 116]
 [ 28  92 195 374]]
[127 423 401 322 246 145  50  28]
[198 497 439 446 335 240 117 195]
data/VOCdevkit/VOC2007/JPEGImages/005445.jpg
[[ 22 232 343 496]]
[22]
[343]
data/VOCdevkit/VOC2007/JPEGImages/005448.jpg
[[ 39 112  95 205]
 [145 113 209 217]
 [228 121 282 200]
 [293 116 346 181]
 [354 114 399 192]
 [409 108 463 186]
 [ 33 149 104 218]
 [147 146 216 218]
 [221 149 282 215]
 [287 151 347 211]
 [340 143 404 207]
 [400 137 466 204]]
[ 39 145 228 293 354 409  33 147 221 287 340 400]
[ 95 209 282 346 399 463 104 216 282 347 404 466]
data/VOCdevkit/VOC2007/JPEGImages/005450.jpg
[[ 15 122 350 499]]
[15]
[350]
data/VOCdevkit/VOC2007/JPEGImages/005451.jpg
[[  0 102 137 499]
 [138 356 219 465]
 [238 361 297 478]
 [155 395 295 494]]
[  0 138 2

[ 88 262 457]
data/VOCdevkit/VOC2007/JPEGImages/005728.jpg
[[306  60 368 191]
 [ 94 141 220 247]
 [269 113 351 249]]
[306  94 269]
[368 220 351]
data/VOCdevkit/VOC2007/JPEGImages/005729.jpg
[[ 97 107 475 331]
 [191  33 365 303]]
[ 97 191]
[475 365]
data/VOCdevkit/VOC2007/JPEGImages/005730.jpg
[[137 254 179 306]
 [308 237 372 331]
 [176 254 202 292]]
[137 308 176]
[179 372 202]
data/VOCdevkit/VOC2007/JPEGImages/005731.jpg
[[ 71  33 498 312]
 [ 90 176 119 284]]
[71 90]
[498 119]
data/VOCdevkit/VOC2007/JPEGImages/005732.jpg
[[ 43 231 144 405]
 [157 208 244 407]
 [298 170 455 400]
 [ 40 165 145 384]
 [142 156 264 373]
 [320 114 449 372]]
[ 43 157 298  40 142 320]
[144 244 455 145 264 449]
data/VOCdevkit/VOC2007/JPEGImages/005735.jpg
[[  7  99 499 286]]
[7]
[499]
data/VOCdevkit/VOC2007/JPEGImages/005736.jpg
[[240 141 489 299]
 [ 12 116 189 227]]
[240  12]
[489 189]
data/VOCdevkit/VOC2007/JPEGImages/005738.jpg
[[122  91 442 269]
 [393 102 499 230]
 [253  84 273 101]]
[122 393 253]
[442 499 2

[212]
[419]
data/VOCdevkit/VOC2007/JPEGImages/005996.jpg
[[  0   0 499 289]]
[0]
[499]
data/VOCdevkit/VOC2007/JPEGImages/005998.jpg
[[ 18  28 480 326]]
[18]
[480]
data/VOCdevkit/VOC2007/JPEGImages/006000.jpg
[[145 258 308 499]
 [135 178 293 281]]
[145 135]
[308 293]
data/VOCdevkit/VOC2007/JPEGImages/006001.jpg
[[  0   2 498 314]]
[0]
[498]
data/VOCdevkit/VOC2007/JPEGImages/006004.jpg
[[ 32 163 239 423]
 [149 145 276 467]
 [ 67 168 203 499]
 [232 172 332 499]]
[ 32 149  67 232]
[239 276 203 332]
data/VOCdevkit/VOC2007/JPEGImages/006005.jpg
[[194  80 320 195]
 [ 81  69 197 191]
 [118 329 193 454]]
[194  81 118]
[320 197 193]
data/VOCdevkit/VOC2007/JPEGImages/006009.jpg
[[202 117 366 234]]
[202]
[366]
data/VOCdevkit/VOC2007/JPEGImages/006011.jpg
[[163 178 310 275]
 [154  94 290 163]
 [234  75 261 116]
 [216 141 256 180]]
[163 154 234 216]
[310 290 261 256]
data/VOCdevkit/VOC2007/JPEGImages/006012.jpg
[[183 185 213 209]
 [163 168 188 202]]
[183 163]
[213 188]
data/VOCdevkit/VOC2007/JPEGIma

data/VOCdevkit/VOC2007/JPEGImages/006229.jpg
[[343 116 424 305]
 [126 114 194 247]
 [118 114 153 211]
 [ 38  74 127 374]
 [145 130 332 374]]
[343 126 118  38 145]
[424 194 153 127 332]
data/VOCdevkit/VOC2007/JPEGImages/006230.jpg
[[ 13  32 499 355]]
[13]
[499]
data/VOCdevkit/VOC2007/JPEGImages/006233.jpg
[[ 77 302 114 346]]
[77]
[114]
data/VOCdevkit/VOC2007/JPEGImages/006234.jpg
[[  1  62 485 231]]
[1]
[485]
data/VOCdevkit/VOC2007/JPEGImages/006235.jpg
[[ 19 250 149 307]
 [138 250 278 307]
 [254 256 369 306]
 [343 258 459 304]
 [409 238 490 299]
 [ 36 229  57 261]]
[ 19 138 254 343 409  36]
[149 278 369 459 490  57]
data/VOCdevkit/VOC2007/JPEGImages/006236.jpg
[[ 36 203 371 422]
 [  0 234  73 499]
 [155 114 277 440]]
[ 36   0 155]
[371  73 277]
data/VOCdevkit/VOC2007/JPEGImages/006238.jpg
[[ 96 111 222 223]
 [142   0 242 138]
 [183  10 460 332]]
[ 96 142 183]
[222 242 460]
data/VOCdevkit/VOC2007/JPEGImages/006240.jpg
[[  1  13 350 466]]
[1]
[350]
data/VOCdevkit/VOC2007/JPEGImages/00624

data/VOCdevkit/VOC2007/JPEGImages/006501.jpg
[[  0   0 382 210]]
[0]
[382]
data/VOCdevkit/VOC2007/JPEGImages/006503.jpg
[[ 39  83  65 158]
 [ 84  79 106 136]
 [ 26 108 193 326]
 [150  85 281 269]
 [400  59 499 246]
 [ 21 200 497 374]]
[ 39  84  26 150 400  21]
[ 65 106 193 281 499 497]
data/VOCdevkit/VOC2007/JPEGImages/006506.jpg
[[  0 132 138 280]
 [114 152 182 270]
 [213 141 264 284]
 [285 174 336 270]
 [385 200 497 304]
 [378 169 428 275]
 [ 46 108 101 225]
 [124 123 180 222]
 [209 121 269 232]
 [282 132 337 234]
 [379 141 426 241]
 [418 142 469 259]]
[  0 114 213 285 385 378  46 124 209 282 379 418]
[138 182 264 336 497 428 101 180 269 337 426 469]
data/VOCdevkit/VOC2007/JPEGImages/006507.jpg
[[237  88 340 222]]
[237]
[340]
data/VOCdevkit/VOC2007/JPEGImages/006509.jpg
[[ 50 226 130 316]
 [264 294 332 378]
 [226 230 332 306]]
[ 50 264 226]
[130 332 332]
data/VOCdevkit/VOC2007/JPEGImages/006512.jpg
[[ 85  65 414 374]]
[85]
[414]
data/VOCdevkit/VOC2007/JPEGImages/006515.jpg
[[ 30  65 

[[231 179 262 234]]
[231]
[262]
data/VOCdevkit/VOC2007/JPEGImages/006916.jpg
[[  0  72 201 327]
 [149  86 495 330]]
[  0 149]
[201 495]
data/VOCdevkit/VOC2007/JPEGImages/006917.jpg
[[219  10 391 374]]
[219]
[391]
data/VOCdevkit/VOC2007/JPEGImages/006918.jpg
[[ 82 111 351 299]
 [328 146 499 229]]
[ 82 328]
[351 499]
data/VOCdevkit/VOC2007/JPEGImages/006919.jpg
[[ 59 150  75 201]
 [ 54  35 142 103]
 [ 51  45 180 141]
 [ 89  69 234 260]
 [142  61 343 272]
 [122 138 313 368]
 [269 175 435 374]
 [314 265 337 321]]
[ 59  54  51  89 142 122 269 314]
[ 75 142 180 234 343 313 435 337]
data/VOCdevkit/VOC2007/JPEGImages/006922.jpg
[[ 20  70 374 226]]
[20]
[374]
data/VOCdevkit/VOC2007/JPEGImages/006924.jpg
[[ 69  39 332 487]]
[69]
[332]
data/VOCdevkit/VOC2007/JPEGImages/006930.jpg
[[  7 135 498 374]]
[7]
[498]
data/VOCdevkit/VOC2007/JPEGImages/006931.jpg
[[ 21 276 309 429]
 [283 242 332 284]
 [190 196 242 217]
 [149 196 208 214]
 [ 98 195 152 214]
 [ 70 193 112 215]
 [ 37 195  89 216]
 [  0 185  4

[175 282  18  50 336 404]
[319 321  35  67 349 425]
data/VOCdevkit/VOC2007/JPEGImages/007241.jpg
[[ 21  70 493 364]]
[21]
[493]
data/VOCdevkit/VOC2007/JPEGImages/007243.jpg
[[129  42 497 371]
 [234 129 437 372]
 [140 177 251 373]]
[129 234 140]
[497 437 251]
data/VOCdevkit/VOC2007/JPEGImages/007244.jpg
[[ 90  33 400 374]
 [ 12 270 478 374]]
[90 12]
[400 478]
data/VOCdevkit/VOC2007/JPEGImages/007245.jpg
[[  0 117 336 316]
 [ 53  25 497 352]]
[ 0 53]
[336 497]
data/VOCdevkit/VOC2007/JPEGImages/007247.jpg
[[  6  92 400 290]]
[6]
[400]
data/VOCdevkit/VOC2007/JPEGImages/007249.jpg
[[148  53 426 344]]
[148]
[426]
data/VOCdevkit/VOC2007/JPEGImages/007250.jpg
[[189 226 219 329]
 [173  18 378 175]]
[189 173]
[219 378]
data/VOCdevkit/VOC2007/JPEGImages/007256.jpg
[[ 82 189 175 288]
 [ 57 130 140 299]
 [330  78 400 283]]
[ 82  57 330]
[175 140 400]
data/VOCdevkit/VOC2007/JPEGImages/007258.jpg
[[  5  13 387 159]]
[5]
[387]
data/VOCdevkit/VOC2007/JPEGImages/007259.jpg
[[  9  25 364 499]]
[9]
[364]


[225]
[343]
data/VOCdevkit/VOC2007/JPEGImages/007547.jpg
[[  0 115 448 374]
 [  0   0 177 372]]
[0 0]
[448 177]
data/VOCdevkit/VOC2007/JPEGImages/007551.jpg
[[123  64 326 484]]
[123]
[326]
data/VOCdevkit/VOC2007/JPEGImages/007555.jpg
[[  0 147 123 171]]
[0]
[123]
data/VOCdevkit/VOC2007/JPEGImages/007558.jpg
[[133  51 302 333]
 [ 25  36 190 333]
 [217  20 343 179]]
[133  25 217]
[302 190 343]
data/VOCdevkit/VOC2007/JPEGImages/007559.jpg
[[180  77 318 460]
 [  0 120  74 212]]
[180   0]
[318  74]
data/VOCdevkit/VOC2007/JPEGImages/007563.jpg
[[141 171 291 498]
 [ 49 177 167 499]]
[141  49]
[291 167]
data/VOCdevkit/VOC2007/JPEGImages/007565.jpg
[[105  33 357 262]
 [138 123 441 354]]
[105 138]
[357 441]
data/VOCdevkit/VOC2007/JPEGImages/007566.jpg
[[  4  29 498 333]
 [311   0 410  69]
 [  4   1  60 119]
 [134  36 152  52]
 [284   3 315  34]]
[  4 311   4 134 284]
[498 410  60 152 315]
data/VOCdevkit/VOC2007/JPEGImages/007568.jpg
[[ 34   0 465 373]]
[34]
[465]
data/VOCdevkit/VOC2007/JPEGImage

[361 271 494]
data/VOCdevkit/VOC2007/JPEGImages/007843.jpg
[[ 29 305 451 449]]
[29]
[451]
data/VOCdevkit/VOC2007/JPEGImages/007845.jpg
[[188  33 430 354]
 [ 42  69 370 262]]
[188  42]
[430 370]
data/VOCdevkit/VOC2007/JPEGImages/007847.jpg
[[ 35 109 330 293]]
[35]
[330]
data/VOCdevkit/VOC2007/JPEGImages/007853.jpg
[[150 176 414 451]]
[150]
[414]
data/VOCdevkit/VOC2007/JPEGImages/007854.jpg
[[  8 322 283 484]]
[8]
[283]
data/VOCdevkit/VOC2007/JPEGImages/007855.jpg
[[ 51 144 493 294]
 [ 25 215  53 231]]
[51 25]
[493  53]
data/VOCdevkit/VOC2007/JPEGImages/007856.jpg
[[106 117 349 229]
 [379 100 409 155]
 [399 103 430 156]
 [448  89 468 115]
 [465  91 485 114]
 [457 102 488 159]
 [446 104 466 157]
 [420 102 453 156]
 [460  65 474  84]]
[106 379 399 448 465 457 446 420 460]
[349 409 430 468 485 488 466 453 474]
data/VOCdevkit/VOC2007/JPEGImages/007857.jpg
[[272 105 371 184]]
[272]
[371]
data/VOCdevkit/VOC2007/JPEGImages/007859.jpg
[[ 31   3 499 336]]
[31]
[499]
data/VOCdevkit/VOC2007/JPEGIma

[297]
data/VOCdevkit/VOC2007/JPEGImages/008213.jpg
[[169 125 208 166]
 [ 58  65  70 103]]
[169  58]
[208  70]
data/VOCdevkit/VOC2007/JPEGImages/008216.jpg
[[ 60 275  95 406]
 [133 321 158 364]
 [105 408 174 499]
 [ 42 421 112 499]
 [  1 406 148 499]
 [221 307 287 499]
 [178 301 217 350]
 [170 320 222 478]
 [307 304 329 386]
 [290 311 309 384]]
[ 60 133 105  42   1 221 178 170 307 290]
[ 95 158 174 112 148 287 217 222 329 309]
data/VOCdevkit/VOC2007/JPEGImages/008218.jpg
[[  5  89 306 374]
 [336 333 376 374]
 [361 272 400 360]]
[  5 336 361]
[306 376 400]
data/VOCdevkit/VOC2007/JPEGImages/008220.jpg
[[ 42   0 499 332]
 [205 152 286 218]]
[ 42 205]
[499 286]
data/VOCdevkit/VOC2007/JPEGImages/008222.jpg
[[ 57  97 284 421]]
[57]
[284]
data/VOCdevkit/VOC2007/JPEGImages/008223.jpg
[[  0  11 371 373]]
[0]
[371]
data/VOCdevkit/VOC2007/JPEGImages/008224.jpg
[[ 65  84 157 224]
 [172 104 257 213]
 [309 109 396 224]
 [246  99 328 234]
 [ 14  97  65 211]
 [391 144 445 202]
 [456 144 484 192]]
[ 65 

[119]
[424]
data/VOCdevkit/VOC2007/JPEGImages/008522.jpg
[[165 143 302 187]]
[165]
[302]
data/VOCdevkit/VOC2007/JPEGImages/008523.jpg
[[ 64 124 265 248]
 [177   0 499 321]]
[ 64 177]
[265 499]
data/VOCdevkit/VOC2007/JPEGImages/008524.jpg
[[324   0 393  33]
 [  9   5 399 286]]
[324   9]
[393 399]
data/VOCdevkit/VOC2007/JPEGImages/008526.jpg
[[101 249 242 423]
 [ 59 245 120 384]
 [289 217 318 257]
 [274 202 293 241]
 [219 175 275 279]]
[101  59 289 274 219]
[242 120 318 293 275]
data/VOCdevkit/VOC2007/JPEGImages/008529.jpg
[[110 348 374 499]
 [ 98 195 266 438]
 [225 188 371 366]
 [233 169 313 292]
 [184 156 261 276]
 [127 149 198 233]
 [ 45 156 128 250]
 [  0 193 109 424]
 [  0 311 133 499]
 [330 107 367 171]]
[110  98 225 233 184 127  45   0   0 330]
[374 266 371 313 261 198 128 109 133 367]
data/VOCdevkit/VOC2007/JPEGImages/008530.jpg
[[173  31 499 332]]
[173]
[499]
data/VOCdevkit/VOC2007/JPEGImages/008533.jpg
[[ 19  27 496 423]]
[19]
[496]
data/VOCdevkit/VOC2007/JPEGImages/008534.jpg


[244]
data/VOCdevkit/VOC2007/JPEGImages/008822.jpg
[[107  11 499 332]]
[107]
[499]
data/VOCdevkit/VOC2007/JPEGImages/008823.jpg
[[  0 113 442 289]]
[0]
[442]
data/VOCdevkit/VOC2007/JPEGImages/008826.jpg
[[  0 187 154 335]]
[0]
[154]
data/VOCdevkit/VOC2007/JPEGImages/008831.jpg
[[224 240 488 402]
 [  3 252 243 403]]
[224   3]
[488 243]
data/VOCdevkit/VOC2007/JPEGImages/008833.jpg
[[195   0 474 374]]
[195]
[474]
data/VOCdevkit/VOC2007/JPEGImages/008835.jpg
[[332   4 458 116]
 [210   3 326 137]
 [119  17 198 116]
 [ 23  53 103 151]
 [ 75 128 169 220]
 [142 168 247 276]
 [256 116 346 211]]
[332 210 119  23  75 142 256]
[458 326 198 103 169 247 346]
data/VOCdevkit/VOC2007/JPEGImages/008836.jpg
[[ 59  79 352 374]
 [ 70  69 160 265]
 [373 114 499 374]
 [  0 277  54 374]
 [  0 111  83 329]
 [ 44   0  71  66]
 [218  10 244  76]
 [445 102 499 213]]
[ 59  70 373   0   0  44 218 445]
[352 160 499  54  83  71 244 499]
data/VOCdevkit/VOC2007/JPEGImages/008837.jpg
[[426  79 499 372]
 [ 63  32 434 372

[296 248 205 165 120  71 304 499]
data/VOCdevkit/VOC2007/JPEGImages/009048.jpg
[[141 307 256 403]
 [ 62  89 195 213]]
[141  62]
[256 195]
data/VOCdevkit/VOC2007/JPEGImages/009049.jpg
[[ 60  74 217 261]
 [141  78 449 326]
 [155  10 273 121]
 [252  15 400 268]]
[ 60 141 155 252]
[217 449 273 400]
data/VOCdevkit/VOC2007/JPEGImages/009051.jpg
[[472 177 499 221]
 [188 197 446 306]
 [ 94 254 219 367]
 [285 259 406 374]]
[472 188  94 285]
[499 446 219 406]
data/VOCdevkit/VOC2007/JPEGImages/009053.jpg
[[ 82  81 457 305]]
[82]
[457]
data/VOCdevkit/VOC2007/JPEGImages/009058.jpg
[[ 16   0 499 286]]
[16]
[499]
data/VOCdevkit/VOC2007/JPEGImages/009059.jpg
[[186  84 403 186]]
[186]
[403]
data/VOCdevkit/VOC2007/JPEGImages/009060.jpg
[[  0  66 449 332]
 [375 104 498 223]]
[  0 375]
[449 498]
data/VOCdevkit/VOC2007/JPEGImages/009063.jpg
[[319 148 452 188]
 [108 123 240 175]]
[319 108]
[452 240]
data/VOCdevkit/VOC2007/JPEGImages/009064.jpg
[[ 50  70 470 342]
 [  0 106  46 204]
 [345  40 499 201]
 [278  

[[ 19  18 324 393]]
[19]
[324]
data/VOCdevkit/VOC2007/JPEGImages/009382.jpg
[[  0 235 332 499]
 [ 28  74 299 484]
 [184   0 322 369]]
[  0  28 184]
[332 299 322]
data/VOCdevkit/VOC2007/JPEGImages/009386.jpg
[[ 91   1 497 373]]
[91]
[497]
data/VOCdevkit/VOC2007/JPEGImages/009388.jpg
[[114 172 186 232]
 [180 152 241 218]
 [ 83 264 266 374]
 [337 176 400 251]
 [427 227 446 278]]
[114 180  83 337 427]
[186 241 266 400 446]
data/VOCdevkit/VOC2007/JPEGImages/009389.jpg
[[  0 116 381 363]]
[0]
[381]
data/VOCdevkit/VOC2007/JPEGImages/009392.jpg
[[  0 193  90 330]
 [206 229 251 246]
 [251 222 285 236]
 [294 222 327 236]
 [186 222 210 232]
 [449 223 481 237]
 [426 282 499 330]]
[  0 206 251 294 186 449 426]
[ 90 251 285 327 210 481 499]
data/VOCdevkit/VOC2007/JPEGImages/009393.jpg
[[179 139 355 341]
 [ 21 159 213 371]
 [ 43   1 205 371]
 [398 161 462 295]]
[179  21  43 398]
[355 213 205 462]
data/VOCdevkit/VOC2007/JPEGImages/009394.jpg
[[  0   1 497 253]
 [341 159 367 260]]
[  0 341]
[497 367]
d

INFO:root:load data: filtered 0 roidb entries: 10022 -> 10022



[0]
[457]
data/VOCdevkit/VOC2007/JPEGImages/009711.jpg
[[ 72  58 463 313]
 [  0 149  78 220]
 [146 185 185 302]
 [ 81 205 123 303]]
[ 72   0 146  81]
[463  78 185 123]
data/VOCdevkit/VOC2007/JPEGImages/009712.jpg
[[257 167 313 374]
 [205 179 261 374]]
[257 205]
[313 261]
data/VOCdevkit/VOC2007/JPEGImages/009713.jpg
[[ 94  56 386 343]
 [265   0 368 106]
 [  3   3 142 175]]
[ 94 265   3]
[386 368 142]
data/VOCdevkit/VOC2007/JPEGImages/009717.jpg
[[382 153 498 235]
 [ 75 205 237 332]
 [230 162 318 282]
 [304 177 365 233]
 [ 42  74 160 177]
 [162  76 262 158]
 [269  77 353 149]
 [340 106 385 148]
 [445 102 498 136]
 [  0 101  53 122]
 [372 181 410 284]]
[382  75 230 304  42 162 269 340 445   0 372]
[498 237 318 365 160 262 353 385 498  53 410]
data/VOCdevkit/VOC2007/JPEGImages/009718.jpg
[[203 193 498 251]
 [ 92 314 138 343]
 [260 336 338 371]]
[203  92 260]
[498 138 338]
data/VOCdevkit/VOC2007/JPEGImages/009719.jpg
[[  0 105 341 412]
 [ 81  69 494 411]]
[ 0 81]
[341 494]
data/VOCdevkit/V

('providing maximum shape', [('data', (1, 3, 600, 1000)), ('gt_boxes', (1, 100, 5))], [('label', (1, 20646)), ('bbox_target', (1, 36, 37, 62)), ('bbox_weight', (1, 36, 37, 62))])
output shape
{'bbox_loss_reshape_output': (1L, 128L, 84L),
 'blockgrad0_output': (1L, 128L),
 'cls_prob_reshape_output': (1L, 128L, 21L),
 'rpn_bbox_loss_output': (1L, 36L, 32L, 62L),
 'rpn_cls_prob_output': (1L, 2L, 288L, 62L)}
model/vgg16
vgg16
('lr', 0.001, 'lr_epoch_diff', [7], 'lr_iters', [70154])


INFO:root:Epoch[0] Batch [20]	Speed: 0.42 samples/sec	Train-RPNAcc=0.908854,	RPNLogLoss=0.303647,	RPNL1Loss=1.331508,	RCNNAcc=0.731027,	RCNNLogLoss=1.396968,	RCNNL1Loss=2.578552,	
INFO:root:Epoch[0] Batch [40]	Speed: 0.71 samples/sec	Train-RPNAcc=0.926258,	RPNLogLoss=0.256415,	RPNL1Loss=1.141757,	RCNNAcc=0.770579,	RCNNLogLoss=1.146583,	RCNNL1Loss=2.595929,	
INFO:root:Epoch[0] Batch [60]	Speed: 0.55 samples/sec	Train-RPNAcc=0.937564,	RPNLogLoss=0.221250,	RPNL1Loss=1.101883,	RCNNAcc=0.794314,	RCNNLogLoss=1.020761,	RCNNL1Loss=2.520255,	
INFO:root:Epoch[0] Batch [80]	Speed: 0.57 samples/sec	Train-RPNAcc=0.938850,	RPNLogLoss=0.202296,	RPNL1Loss=1.047270,	RCNNAcc=0.798322,	RCNNLogLoss=0.976983,	RCNNL1Loss=2.520876,	
INFO:root:Epoch[0] Batch [100]	Speed: 1.13 samples/sec	Train-RPNAcc=0.946202,	RPNLogLoss=0.188144,	RPNL1Loss=0.983935,	RCNNAcc=0.801361,	RCNNLogLoss=0.935375,	RCNNL1Loss=2.535259,	
INFO:root:Epoch[0] Batch [120]	Speed: 0.67 samples/sec	Train-RPNAcc=0.948993,	RPNLogLoss=0.180534,	

In [None]:
# Fast r-cnn trained on VOC2007 dataset

import os
import cv2
import mxnet as mx
import numpy as np
from rcnn.config import config
from rcnn.symbol import get_vgg_test, get_vgg_rpn_test
from rcnn.io.image import resize, transform
from rcnn.core.tester import Predictor, im_detect, im_proposal, vis_all_detection, draw_all_detection
from rcnn.utils.load_model import load_param
from rcnn.processing.nms import py_nms_wrapper, cpu_nms_wrapper, gpu_nms_wrapper

import urllib2
import tempfile

# 20 classes
CLASSES = ('__background__',
           'aeroplane', 'bicycle', 'bird', 'boat',
           'bottle', 'bus', 'car', 'cat', 'chair',
           'cow', 'diningtable', 'dog', 'horse',
           'motorbike', 'person', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor')

config.TEST.HAS_RPN = True
SHORT_SIDE = config.SCALES[0][0]
LONG_SIDE = config.SCALES[0][1]
PIXEL_MEANS = config.PIXEL_MEANS
DATA_NAMES = ['data', 'im_info']
LABEL_NAMES = None
DATA_SHAPES = [('data', (1, 3, LONG_SIDE, SHORT_SIDE)), ('im_info', (1, 3))]
LABEL_SHAPES = None

# visualization
CONF_THRESH = 0.7
NMS_THRESH = 0.3
nms = py_nms_wrapper(NMS_THRESH)


def get_net(symbol, prefix, epoch, ctx):
    arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True)

    # infer shape
    data_shape_dict = dict(DATA_SHAPES)
    arg_names, aux_names = symbol.list_arguments(), symbol.list_auxiliary_states()
    arg_shape, _, aux_shape = symbol.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(arg_names, arg_shape))
    aux_shape_dict = dict(zip(aux_names, aux_shape))

    # check shapes
    for k in symbol.list_arguments():
        if k in data_shape_dict or 'label' in k:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in symbol.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    predictor = Predictor(symbol, DATA_NAMES, LABEL_NAMES, context=ctx,
                          provide_data=DATA_SHAPES, provide_label=LABEL_SHAPES,
                          arg_params=arg_params, aux_params=aux_params)
    return predictor


def generate_batch(im):
    """
    preprocess image, return batch
    :param im: cv2.imread returns [height, width, channel] in BGR
    :return:
    data_batch: MXNet input batch
    data_names: names in data_batch
    im_scale: float number
    """
    im_array, im_scale = resize(im, SHORT_SIDE, LONG_SIDE)
    im_array = transform(im_array, PIXEL_MEANS)
    im_info = np.array([[im_array.shape[2], im_array.shape[3], im_scale]], dtype=np.float32)
    data = [mx.nd.array(im_array), mx.nd.array(im_info)]
    data_shapes = [('data', im_array.shape), ('im_info', im_info.shape)]
    data_batch = mx.io.DataBatch(data=data, label=None, provide_data=data_shapes, provide_label=None)
    return data_batch, DATA_NAMES, im_scale


def demo_net(predictor, im, vis=False):
    """
    generate data_batch -> im_detect -> post process
    :param predictor: Predictor
    :param image_name: image name
    :param vis: will save as a new image if not visualized
    :return: None
    """

    data_batch, data_names, im_scale = generate_batch(im)
    scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, im_scale)

    all_boxes = [[] for _ in CLASSES]
    for cls in CLASSES:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind, np.newaxis]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :]
        keep = nms(dets)
        all_boxes[cls_ind] = dets[keep, :]

    boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]

    # print results
    print('class ---- [[x1, x2, y1, y2, confidence]]')
    for ind, boxes in enumerate(boxes_this_image):
        if len(boxes) > 0:
            print('---------', CLASSES[ind], '---------')
            print(boxes)

    if vis:
        vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, CLASSES, im_scale)
    else:
        #result_file = image_name.replace('.', '_result.')
        result_file = "output.jpg"
        print('results saved to %s' % result_file)
        im = draw_all_detection(data_dict['data'].asnumpy(), boxes_this_image, CLASSES, im_scale)
        cv2.imwrite(result_file, im)

def get_image_from_url(url, img_file):


    req = urllib2.urlopen(url)
    img_file.write(req.read())
    img_file.flush()
    return img_file.name

## Inference - Lets run some predictions


In [None]:
vis = True
gpu = 0
epoch = 1
prefix = 'e2e'

ctx = mx.gpu(gpu)
symbol = get_vgg_test(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
predictor = get_net(symbol, prefix, epoch, ctx)



In [None]:
img_file = tempfile.NamedTemporaryFile()
#url = 'http://images.all-free-download.com/images/graphiclarge/aeroplane_boeing_737_air_new_zealand_218019.jpg'
url = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/segexamples/images/21.jpg'
#url = 'https://www.siemens.com/press/pool/de/pressebilder/2011/mobility/soimo201107/072dpi/soimo201107-04_072dpi.jpg'

image = get_image_from_url(url, img_file)
assert os.path.exists(image), image + ' not found'
im = cv2.imread(image)
demo_net(predictor, im, vis)

## Live object detection through webcam

In [None]:
from IPython.display import HTML
from StringIO import StringIO
import numpy as np
import cv2

vis = False
def classify(img):
    img = img[len('data:image/png;base64,'):].decode('base64')
    #img_stream = StringIO()
    #img_stream.write(img)
    #img_stream.seek(0)
    #img_array = np.asarray(bytearray(img_stream.read()), dtype=np.uint8)
    img_file = tempfile.NamedTemporaryFile()
    img_file.write(img)
    img_file.flush()
    img_array = cv2.imread(img_file.name)
    demo_net(predictor, img_array, vis)
    return "done"
    
HTML(data=''' 
<style>
#container {
    margin: 0px auto;
    width: 250px;
    height: 250px;
    border: 10px #333 solid;
}
#videoElement {
    width: 250px;
    height: 250px;
    background-color: #666;
}
</style>
<input type=button value="Capture Image" onClick="take_snapshot()">

<video autoplay="true" id="videoElement"></video>
<canvas id="c" style="display:none;" width="300" height="300"></canvas>
<canvas id="display" width="300" height="300"></canvas>


<script>

var video = document.querySelector("#videoElement");
navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia || navigator.oGetUserMedia;
 
if (navigator.getUserMedia) {       
    navigator.getUserMedia({video: true}, handleVideo, videoError);
}
 
function handleVideo(stream) {
    video.src = window.URL.createObjectURL(stream);
}
 
function videoError(e) {
    // do something
}


function handle_output(out) {
    console.log("done classifying", out);
    // display image
    var ctx = $("#display")[0].getContext("2d"),
    img = new Image();
    
    img.onload = function(){
        ctx.drawImage(img, 0, 0, 300, 300);
        $("span").text("Loaded.");
    };
    img.src = "output.jpg";
    
    //console.log("done classifying", out.content.data["text/plain"]);
}

function take_snapshot() {
    var canvas = document.getElementById('c');
    var kernel = IPython.notebook.kernel;
    canvas.getContext("2d").drawImage(video, 0, 0, 300, 300);

    data = canvas.toDataURL('image/png');
    console.log("captured data");
    kernel.execute("classify('" + data + "')", {
        'iopub': {
            'output': handle_output
        }
    }, {
        silent: false
    });
}
</script>

''')



## 