## Importing Libraries

In [1]:
import os
import scipy
import numpy as np
import pandas as pd
import PIL
import struct
from numpy import expand_dims

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

In [2]:
import tensorflow as tf
from skimage.transform import resize
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Lambda, Conv2D, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D

In [4]:
import keras
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import concatenate, add
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img, load_img

## Class to load the pretrained model of YOLO-v3

In [17]:
class WeightReader:
    
    def __init__(self, weight_file):
        
        with open(weight_file, 'rb') as w_f:
            major,  = struct.unpack('i', w_f.read(4))
            minor,  = struct.unpack('i', w_f.read(4))
            revision,  = struct.unpack('i', w_f.read(4))
            
            if (major*10 + minor) >= 2 and major < 1000 and minor < 1000:
                w_f.read(8)
            else:
                w_f.read(4)

            transpose = (major > 1000) or (minor > 1000)

            binary = w_f.read()
        
        self.offset = 0
        self.all_weights = np.frombuffer(binary, dtype='float32')
    
    def read_bytes(self, size):
        
        self.offset = self.offset + size
        return self.all_weights[self.offset-size:self.offset]
    
    def load_weights(self, model):
        
        for i in range(106):
            try:
                conv_layer = model.get_layer('conv_' + str(i))
                print('loading weights of conv_layer #' + str(i))
                
                if i not in [81, 93, 105]:
                    norm_layer = model.get_layer('bnorm_' + str(i))

                    size = np.prod(norm_layer.get_weights()[0].shape)

                    beta  = self.read_bytes(size) # bias
                    gamma = self.read_bytes(size) # scale
                    mean  = self.read_bytes(size) # mean
                    var   = self.read_bytes(size) # variance

                    weights = norm_layer.set_weights([beta, gamma, mean, var])

                if len(conv_layer.get_weights()) > 1:
                    bias = self.read_bytes(np.prod(norm_layer.get_weights()[1].shape))
                    kernel = self.read_bytes(np.prod(norm_layer.get_weights()[0].shape))

                    kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
                    kernel = kernel.transpose([2,3,1,0])
                    conv_layer.set_weights([kernel, bias])
                    
                else:
                    kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
                    kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
                    kernel = kernel.transpose([2,3,1,0])
                    conv_layer.set_weights([kernel])
                    
            except ValueError:
                print("no convolution #" + str(i))     

    def reset(self):
        self.offset = 0

## Creating the YOLOv3 Model

In [18]:
def _conv_block(inp, convs, skip=True):
    
    x = inp
    count = 0
    
    for conv in convs:
        
        if count == (len(convs) - 2) and skip:
            skip_connection = x
        count += 1
        
        if conv['stride'] > 1 : x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top
        x = Conv2D(conv['filter'], 
                   conv['kernel'], 
                   strides=conv['stride'], 
                   padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top
                   name='conv_' + str(conv['layer_idx']), 
                   use_bias=False if conv['bnorm'] else True)(x)
        
        if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
        if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
            
    return add([skip_connection, x]) if skip else x

In [19]:
# creating the YOLO model
def make_yolov3_model():
    input_image = Input(shape=(None, None, 3))

    # Layer  0 => 4
    x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
                                  {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
                                  {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
                                  {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])

    # Layer  5 => 8
    x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
                        {'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])

    # Layer  9 => 11
    x = _conv_block(x, [{'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])

    # Layer 12 => 15
    x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
                        {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
                        {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])

    # Layer 16 => 36
    for i in range(7):
        x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
                            {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
        
    skip_36 = x
        
    # Layer 37 => 40
    x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])

    # Layer 41 => 61
    for i in range(7):
        x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
                            {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
        
    skip_61 = x
        
    # Layer 62 => 65
    x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])

    # Layer 66 => 74
    for i in range(3):
        x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
                            {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
        
    # Layer 75 => 79
    x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], skip=False)

    # Layer 80 => 82
    yolo_82 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 80},
                              {'filter':  255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], skip=False)

    # Layer 83 => 86
    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], skip=False)
    x = UpSampling2D(2)(x)
    x = concatenate([x, skip_61])

    # Layer 87 => 91
    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], skip=False)

    # Layer 92 => 94
    yolo_94 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 92},
                              {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], skip=False)

    # Layer 95 => 98
    x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True,   'layer_idx': 96}], skip=False)
    x = UpSampling2D(2)(x)
    x = concatenate([x, skip_36])

    # Layer 99 => 106
    yolo_106 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 99},
                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 100},
                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 101},
                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 102},
                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 103},
                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 104},
                               {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False)

    model = Model(input_image, [yolo_82, yolo_94, yolo_106])    
    return model

In [20]:
net_h, net_w = 416, 416
obj_thresh, nms_thresh = 0.5, 0.45
anchors = [[116,90,  156,198,  373,326],  [30,61, 62,45,  59,119], [10,13,  16,30,  33,23]]
labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \
              "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \
              "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \
              "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \
              "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \
              "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \
              "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \
              "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \
              "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \
              "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

# make the yolov3 model to predict 80 classes on COCO

yolov3 = make_yolov3_model()

# load the weights trained on COCO into the model
weight_reader = WeightReader('yolov3.weights')
weight_reader.load_weights(yolov3)

loading weights of conv_layer #0
[-4.316885   -0.7578076  -2.1098018   1.7402638   1.4071269  -3.0952053
 -0.38860837  0.75603795  1.982805    1.2893223   0.652888    2.6263633
  2.3013082  -2.04827    -3.7340226  -2.0467598   3.845535   -1.0419698
 -0.30135924 -0.35420752 -3.5354283  -2.628548    0.74821305  0.3917957
  2.3627155  -1.7907225   2.5973146  -0.34963462 -2.6972923  -2.688753
  0.99702346 -0.20098142] beta
[ 2.6223972   1.3536469   1.6286678   0.99905145 -1.0922571   2.6365428
  0.9870672   1.6019626   0.8613433  -1.3214242   1.1380545   1.0997647
  1.7826979   1.5409452   2.5057783   1.8394158   0.9597077   1.506216
  1.1141889   1.1447448   1.7145146  -2.1283774  -1.0026759  -1.4917758
  0.65230376  1.4994049   1.6686112   1.1225682   2.2276077   2.1827738
  1.1739247   1.1685042 ] gamma
[-0.5719402  -0.7738392  -0.28336832  0.39322987  0.00780761 -0.6109912
 -0.16133149 -0.22186707 -0.29902607  0.11704189 -0.00553402  0.01755599
 -0.02504491  0.00425338  0.42962    -0.4

loading weights of conv_layer #26
[ 0.17270254 -0.47539127 -0.7499666  -0.7663512  -0.5018035  -0.8676291
 -0.6538639  -0.50813574 -0.4821725  -0.630773   -0.65970445 -1.2105782
 -1.0563865  -0.5038816  -0.5663058  -1.2413746  -0.86469805 -0.66845524
 -0.53871715 -1.2280432  -0.9913507  -0.68813634 -0.67306936 -1.7679778
 -0.71180165 -0.8285192  -0.9049759  -0.3274078  -0.5270154  -0.5855719
 -0.14582779 -0.56353855 -0.5017216  -0.7298588  -0.5987139  -0.39306596
 -0.5709947  -0.81774    -0.93940574 -0.4418601  -1.0098444  -0.84064525
 -0.7850553  -0.41516435 -0.7292685  -0.4327545  -0.9750183  -0.57697964
 -0.6886168  -0.9031936  -0.43649566 -0.5966401  -0.5409715  -0.49223197
 -0.38998783 -0.16689238 -0.41700175 -0.7025839  -0.97807825 -0.8480647
 -0.6566256  -0.7116823  -0.7322197  -0.59194434 -0.730896   -0.5493003
 -0.13730742 -0.6138656  -0.6716309  -0.56212497 -0.54227287 -0.47468382
 -0.47842026 -0.57973915 -0.48736677 -0.6109873  -0.8367715  -0.56662494
 -0.54925597 -0.9473146

no convolution #52
loading weights of conv_layer #53
[-1.0290112  -0.5996369  -0.9219021  -0.78030187 -0.32392368 -1.1773905
 -1.1234553  -1.113147   -1.2505921  -0.7425891  -1.031482   -0.8543439
 -0.5292857  -0.95627266 -0.19200325 -0.5387754  -1.048959   -0.9500413
 -0.8875639  -0.82086825 -0.9230789  -0.7293383  -1.1044185  -0.6531724
 -0.57329804 -0.4084843  -0.60352254 -1.0289915  -0.90216506 -0.5405355
 -1.1044711  -0.9117253  -0.6962004  -0.22563924 -0.7192353  -1.3104419
 -0.799755   -1.2632338  -0.72444457 -0.72916687 -0.12310977 -0.7825662
 -0.31500536 -0.9932234  -0.76963985 -0.9628792  -0.52961504 -0.7200314
 -0.9947266  -0.71918523 -0.91308874 -1.0618747  -0.5096293  -0.71657634
 -0.5899963  -0.78675425 -0.9538206  -1.0549647  -0.9638517  -0.40941232
 -0.49402177 -0.6715026  -0.9640586  -0.3988207  -0.52792    -0.70314425
 -0.6451004  -0.8815837  -0.53550595 -1.169154   -0.81494915 -0.632123
 -0.6238133  -0.65851223 -0.5069449  -1.2859913  -0.82918113 -0.45078748
 -0.5475

no convolution #65
loading weights of conv_layer #66
[-0.3220324  -0.17528474 -0.58560145 -0.5551679  -0.43692023 -0.3949635
 -0.92756593 -0.69677734 -0.41961837 -1.1111832  -0.5116708  -1.1326414
 -0.54568213 -0.6976763  -0.6954926  -0.5446521  -0.8103427  -0.25898468
 -0.33069968 -0.6660943  -0.7500819  -1.1145777  -0.43812144 -0.5892893
 -0.54673475 -0.6501206  -0.13172285 -0.49489373 -0.3777386  -0.51071525
 -0.49611354 -0.23959939 -0.50994873 -0.7015003  -1.2451719  -0.65989584
 -0.5654947  -0.7270002  -0.7183722  -0.61347735 -0.34133533 -0.77427566
 -0.96101    -0.74380016 -0.4937973  -1.0703127  -0.6230964  -0.8379421
 -0.54877746 -0.40215665 -1.2958908  -1.0019566  -1.4164579  -0.8482443
 -0.5320144  -0.4145661  -0.76923245 -0.86511254 -0.3496966  -0.78567445
 -0.806324   -0.9094665  -0.64025456 -0.65561223 -1.1549389  -0.44165412
 -0.66277987 -0.41955513 -0.74052215 -0.69436187 -0.42934918  0.01768804
 -0.6750087  -0.7326126  -0.59629214 -1.3169022  -0.6409074  -0.61106265
 -0

no convolution #74
loading weights of conv_layer #75
[-0.11295164 -0.16353464 -0.29434597 -0.19717237 -0.19581603 -0.26774156
 -0.12699853 -0.13611482 -0.17186742 -0.2509703  -0.18791585 -0.15463606
 -0.11069019 -0.20049486 -0.09537828 -0.21795253 -0.30200556 -0.18474473
 -0.11256649 -0.13393551 -0.10887885 -0.1426898  -0.24431181 -0.20633107
 -0.29462546 -0.06252996 -0.12357797 -0.06494185 -0.03388232 -0.17206992
 -0.169904   -0.29600662 -0.12766461 -0.05568198 -0.18594593 -0.25969565
 -0.19018722 -0.2143355  -0.18481508 -0.25429434 -0.24377121 -0.14513445
 -0.14986491 -0.18464091 -0.20263578 -0.12244249 -0.20219079 -0.23517144
 -0.12501381 -0.12142643 -0.13483796 -0.23166268 -0.19434041 -0.22695783
 -0.1476515  -0.1605774  -0.13329783 -0.24762748 -0.2787741  -0.20496607
 -0.25061238 -0.24287389 -0.18704712 -0.13404705 -0.05879677 -0.26649272
 -0.12564473 -0.37714988 -0.22443627 -0.13813187 -0.13239673 -0.13317883
 -0.12352035 -0.18506695 -0.24024811 -0.2606347  -0.22240867 -0.2306170

loading weights of conv_layer #81
no convolution #81
no convolution #82
no convolution #83
loading weights of conv_layer #84
[ 1.63240754e-03  1.19003281e-02 -6.74464321e-03 -1.00346887e-03
 -3.90560506e-03 -1.43450312e-02 -5.03021069e-02 -2.66816206e-02
 -3.25909001e-03  2.07162136e-03  5.13940875e-04 -1.06882881e-02
  2.83875433e-03  2.02310225e-03 -4.18665595e-02  1.31604972e-03
  1.08225206e-02  4.97455359e-04 -2.19738530e-03  1.40995253e-03
 -1.52234023e-03  6.27677131e-04 -5.35993092e-03  4.37105261e-03
  4.92694322e-03 -4.05579526e-03 -4.31361515e-03 -7.35137332e-03
  4.42928961e-03 -1.43936547e-02  5.63319167e-03 -9.80859972e-04
  2.46372409e-02 -6.99103344e-04  4.34675813e-03 -4.03848651e-04
 -1.17344514e-03  1.05179669e-02  2.06738920e-03 -2.83333417e-02
  5.61688421e-03 -1.88150927e-02 -5.77220926e-03  1.15594380e-02
  1.00798095e-02  1.26967728e-01  9.92821413e-04 -1.45193608e-03
 -9.17864591e-03 -6.96097632e-05 -6.51090126e-03  9.17823892e-03
 -4.86423494e-03  2.75245868e-

  1.01969084e-02  8.08870792e-03 -2.10887287e-02  7.54017988e-03] var
loading weights of conv_layer #101
[-4.01183739e-02 -2.70077563e-03 -1.45146295e-01 -1.61039010e-02
  1.79314017e-01  3.18611637e-02 -5.42690307e-02 -1.42006436e-02
  5.97506005e-04 -2.82506016e-03 -1.36880465e-02  1.48629230e-02
  1.84893552e-02 -2.23435145e-02  1.13562513e-02  3.97629961e-02
  2.64101587e-02 -4.89281639e-02  8.33554044e-02  9.49623622e-03
 -2.11640378e-03 -1.27103627e-01 -2.70151310e-02 -1.06701709e-01
 -2.54168082e-03 -6.59116879e-02 -4.53911424e-02 -5.44149168e-02
  2.26374194e-02  7.80586340e-03  4.26569860e-03  1.43158734e-01
 -5.61412498e-02 -9.79958028e-02 -4.57861163e-02 -1.64381185e-04
  6.73834234e-02 -4.38774489e-02 -1.94268720e-03  4.19482030e-02
  2.45666422e-04 -4.80925627e-02 -5.60520589e-02 -5.14190644e-02
 -2.48741508e-02 -8.00071110e-04 -5.27847931e-02  2.45673209e-02
  4.94940802e-02 -1.70842733e-03 -8.53642076e-03  8.22583307e-03
 -2.24056821e-02  4.09715846e-02  1.03103351e-02  

In [21]:
# save the model to file
yolov3.save('yolov3.h5')

In [22]:
# load yolov3 model
yolov3 = load_model('yolov3.h5')



In [23]:
yolov3.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
conv_0 (Conv2D)                 (None, None, None, 3 864         input_4[0][0]                    
__________________________________________________________________________________________________
bnorm_0 (BatchNormalization)    (None, None, None, 3 128         conv_0[0][0]                     
__________________________________________________________________________________________________
leaky_0 (LeakyReLU)             (None, None, None, 3 0           bnorm_0[0][0]                    
____________________________________________________________________________________________

In [24]:
from numpy import expand_dims
def load_image_pixels(filename, shape):
    # load the image to get its shape
    image = load_img(filename)
    width, height = image.size
    # load the image with the required size
    image = load_img(filename, target_size=shape)
    # convert to numpy array
    image = img_to_array(image)
    # scale pixel values to [0, 1]
    image = image.astype('float32')
    image /= 255.0
    # add a dimension so that we have one sample
    image = expand_dims(image, 0)
    return image, width, height

In [18]:
# define the expected input shape for the model
input_w, input_h = 416, 416
# define our new photo
photo_filename = ''
# load and prepare image
image, image_w, image_h = load_image_pixels(photo_filename, (input_w, input_h))

FileNotFoundError: [Errno 2] No such file or directory: 'car.jpeg'

In [14]:
class BoundBox:
    def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        
        self.objness = objness
        self.classes = classes

        self.label = -1
        self.score = -1

    def get_label(self):
        if self.label == -1:
            self.label = np.argmax(self.classes)
        
        return self.label
    
    def get_score(self):
        if self.score == -1:
            self.score = self.classes[self.get_label()]
            
        return self.score

def _sigmoid(x):
    return 1. / (1. + np.exp(-x))

def _interval_overlap(interval_a, interval_b):
    x1, x2 = interval_a
    x3, x4 = interval_b

    if x3 < x1:
        if x4 < x1:
            return 0
        else:
            return min(x2,x4) - x1
    else:
        if x2 < x3:
             return 0
        else:
            return min(x2,x4) - x3 
def bbox_iou(box1, box2):
    intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
    intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
    
    intersect = intersect_w * intersect_h

    w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
    w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
    
    union = w1*h1 + w2*h2 - intersect
    
    return float(intersect) / union

def do_nms(boxes, nms_thresh):
    if len(boxes) > 0:
        nb_class = len(boxes[0].classes)
    else:
        return
        
    for c in range(nb_class):
        sorted_indices = np.argsort([-box.classes[c] for box in boxes])

        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]

            if boxes[index_i].classes[c] == 0: continue

            for j in range(i+1, len(sorted_indices)):
                index_j = sorted_indices[j]

                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
                    boxes[index_j].classes[c] = 0

In [None]:
#decode_netout() that will take each one of the NumPy arrays, one at a time, 
#and decode the candidate bounding boxes and class predictions
def decode_netout(netout, anchors, obj_thresh,  net_h, net_w):
    grid_h, grid_w = netout.shape[:2]
    nb_box = 3
    netout = netout.reshape((grid_h, grid_w, nb_box, -1))
    nb_class = netout.shape[-1] - 5

    boxes = []

    netout[..., :2]  = _sigmoid(netout[..., :2])
    netout[..., 4:]  = _sigmoid(netout[..., 4:])
    netout[..., 5:]  = netout[..., 4][..., np.newaxis] * netout[..., 5:]
    netout[..., 5:] *= netout[..., 5:] > obj_thresh

    for i in range(grid_h*grid_w):
        row = i / grid_w
        col = i % grid_w
        
        for b in range(nb_box):
            # 4th element is objectness score
            objectness = netout[int(row)][int(col)][b][4]
            #objectness = netout[..., :4]
            
            if(objectness.all() <= obj_thresh): continue
            
            # first 4 elements are x, y, w, and h
            x, y, w, h = netout[int(row)][int(col)][b][:4]

            x = (col + x) / grid_w # center position, unit: image width
            y = (row + y) / grid_h # center position, unit: image height
            w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
            h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height  
            
            # last elements are class probabilities
            classes = netout[int(row)][col][b][5:]
            
            box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
            #box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, None, classes)

            boxes.append(box)

    return boxes

In [None]:
# bounding boxes will be stretched back into the shape of the original image
#will allow plotting the original image and draw the bounding boxes, hopefully detecting real objects.
# correct the sizes of the bounding boxes for the shape of the image
#correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w)
def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
    if (float(net_w)/image_w) < (float(net_h)/image_h):
        new_w = net_w
        new_h = (image_h*net_w)/image_w
    else:
        new_h = net_w
        new_w = (image_w*net_h)/image_h
        
    for i in range(len(boxes)):
        x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
        y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
        
        boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
        boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
        boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
        boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)


In [None]:
from matplotlib.patches import Rectangle
def draw_boxes(filename, v_boxes, v_labels, v_scores):
    # load the image
    data = plt.imread(filename)
    # plot the image
    plt.imshow(data)
    # get the context for drawing boxes
    ax = plt.gca()
    # plot each box
    for i in range(len(v_boxes)):
        box = v_boxes[i]
        # get coordinates
        y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
        # calculate width and height of the box
        width, height = x2 - x1, y2 - y1
        # create the shape
        rect = Rectangle((x1, y1), width, height, fill=False, color='red')
        # draw the box
        ax.add_patch(rect)
        # draw text and score in top left corner
        label = "%s (%.3f)" % (v_labels[i], v_scores[i])
        plt.text(x1, y1, label, color='red')
    # show the plot
    plt.show()


In [None]:
# get all of the results above a threshold
# takes the list of boxes, known labels, 
#and our classification threshold as arguments and returns parallel lists of boxes, labels, and scores.
def get_boxes(boxes, labels, thresh):
    v_boxes, v_labels, v_scores = list(), list(), list()
    # enumerate all boxes
    for box in boxes:
        # enumerate all possible labels
        for i in range(len(labels)):
            # check if the threshold for this label is high enough
            if box.classes[i] > thresh:
                v_boxes.append(box)
                v_labels.append(labels[i])
                v_scores.append(box.classes[i]*100)
                # don't break, many labels may trigger for one box
    return v_boxes, v_labels, v_scores

In [None]:
# define the expected input shape for the model
input_w, input_h = 416, 416
# define our new photo
photo_filename = 'car.jpeg'
# load and prepare image
image, image_w, image_h = load_image_pixels(photo_filename, (net_w, net_w))


# make prediction
yolos = yolov3.predict(image)
# summarize the shape of the list of arrays
print([a.shape for a in yolos])

# define the anchors
anchors = [[116,90, 156,198, 373,326], [30,61, 62,45, 59,119], [10,13, 16,30, 33,23]]
# define the probability threshold for detected objects
class_threshold = 0.6
boxes = list()

for i in range(len(yolos)):
        # decode the output of the network
    boxes += decode_netout(yolos[i][0], anchors[i], obj_thresh,  net_h, net_w)

# correct the sizes of the bounding boxes
correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)

# suppress non-maximal boxes
do_nms(boxes, nms_thresh)

# get the details of the detected objects
v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)
# summarize what we found
for i in range(len(v_boxes)):
    print(v_labels[i], v_scores[i])
# draw what we found
draw_boxes(photo_filename, v_boxes, v_labels, v_scores)


In [None]:
print(boxes[1].xmax)