# Tiny Yolo - Keras - Tensorflow backend

YOLO paper: Redmon et al., 2016 (https://arxiv.org/abs/1506.02640)

### Imports

In [1]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, Input, MaxPooling2D, BatchNormalization, Reshape, Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K

import tensorflow as tf

from preprocessing import parse_annotation, BatchGenerator
from yolo_utils import preprocess_image, decode_netout

import numpy as np
import zipfile
import os

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "" 

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### Constants

In [2]:
with open('tiny-yolo-voc-labels.txt', 'r') as f:
    LABELS = [label.rstrip('\n') for label in f.readlines()]

ANCHORS = [1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52]

IMAGE_H =  416
IMAGE_W =  416
CHANNELS = 3

GRID_H,  GRID_W  = 13 , 13
BOX = 5
CLASS = len(LABELS)

TRUE_BOX_BUFFER  = 50
BATCH_SIZE       = 16

EPSILON = 1e-8

CLASS_WEIGHTS = np.ones(CLASS, dtype='float32')

OBJECT_SCALE     = 5.0
COORD_SCALE      = 1.0
WARM_UP_BATCHES  = 0

### Model

In [27]:
input_image = Input(shape=(IMAGE_H, IMAGE_W, CHANNELS))
true_boxes  = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4))

'''
https://keras.io/layers/convolutional/

Conv2D(filters, kernel_size, strides=(1, 1), padding='valid',      \
     data_format=None, dilation_rate=(1, 1), activation=None,      \
     use_bias=True, kernel_initializer='glorot_uniform',           \
     bias_initializer='zeros', kernel_regularizer=None,            \
     bias_regularizer=None, activity_regularizer=None,             \
     kernel_constraint=None, bias_constraint=None)
     
https://keras.io/layers/normalization/#batchnormalization

BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001,          \
    center=True, scale=True, beta_initializer='zeros',             \
    gamma_initializer='ones', moving_mean_initializer='zeros',     \
    moving_variance_initializer='ones', beta_regularizer=None,     \
    gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)

https://keras.io/layers/advanced-activations/#leakyrelu

LeakyReLU(alpha=0.3) 
It allows a small gradient when the unit is not active: f(x) = alpha * x for x < 0, f(x) = x for x >= 0

https://keras.io/layers/pooling/#maxpooling2d

MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None)

https://keras.io/layers/core/    -- Reshape

Reshape(target_shape)
'''
# Layer 1
X = Conv2D(filters=16, kernel_size=(3,3), padding='same', use_bias=False, name='conv_1')(input_image)
X = BatchNormalization(name='norm_1')(X)
X = LeakyReLU(alpha=0.1)(X)
X = MaxPooling2D()(X)

# Layer 2
X = Conv2D(filters=32, kernel_size=(3,3), padding='same', use_bias=False, name='conv_2')(X)
X = BatchNormalization(name='norm_2')(X)
X = LeakyReLU(alpha=0.1)(X)
X = MaxPooling2D()(X)

# Layer 3
X = Conv2D(filters=64, kernel_size=(3,3), padding='same', use_bias=False, name='conv_3')(X)
X = BatchNormalization(name='norm_3')(X)
X = LeakyReLU(alpha=0.1)(X)
X = MaxPooling2D()(X)

# Layer 4
X = Conv2D(filters=128, kernel_size=(3,3), padding='same', use_bias=False, name='conv_4')(X)
X = BatchNormalization(name='norm_4')(X)
X = LeakyReLU(alpha=0.1)(X)
X = MaxPooling2D()(X)

# Layer 5
X = Conv2D(filters=256, kernel_size=(3,3), padding='same', use_bias=False, name='conv_5')(X)
X = BatchNormalization(name='norm_5')(X)
X = LeakyReLU(alpha=0.1)(X)
X = MaxPooling2D()(X)

# Layer 6
X = Conv2D(filters=512, kernel_size=(3,3), padding='same', use_bias=False, name='conv_6')(X)
X = BatchNormalization(name='norm_6')(X)
X = LeakyReLU(alpha=0.1)(X)
X = MaxPooling2D(strides=(1,1), padding='same')(X)

# Layer 7
X = Conv2D(filters=1024, kernel_size=(3,3), padding='same', use_bias=False, name='conv_7')(X)
X = BatchNormalization(name='norm_7')(X)
X = LeakyReLU(alpha=0.1)(X)

# Layer 8
X = Conv2D(filters=1024, kernel_size=(3,3), padding='same', use_bias=False, name='conv_8')(X)
X = BatchNormalization(name='norm_8')(X)
X = LeakyReLU(alpha=0.1)(X)

# Layer 9
# BOX=5, CLASS=20, GRID_H=13, GRID_W=13
X = Conv2D(BOX * (4 + 1 + CLASS), kernel_size=(1, 1), kernel_initializer='he_normal')(X)
Y = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(X)       # X = [None, 13, 13, 125], Y = [None, 13, 13, 5, 25]

# small hack to allow true_boxes to be registered when Keras build the model 
# for more information: https://github.com/fchollet/keras/issues/2790
Y = Lambda(lambda args: args[0])([Y, true_boxes])

# Create model
model = Model([input_image, true_boxes], Y)

In [28]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 416, 416, 3)  0                                            
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 416, 416, 16) 432         input_3[0][0]                    
__________________________________________________________________________________________________
norm_1 (BatchNormalization)     (None, 416, 416, 16) 64          conv_1[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_9 (LeakyReLU)       (None, 416, 416, 16) 0           norm_1[0][0]                     
__________________________________________________________________________________________________
max_poolin

# Update network with pre-trained weights (if any)

In [29]:
if(os.path.isdir("weights")):
    if(os.path.exists("weights/yolov2-tiny-voc.weights")):
        print("file already exists")
    else:
        os.chdir("weights/")
        url = 'https://pjreddie.com/media/files/yolov2-tiny-voc.weights'
        wget.download(url)
        os.chdir("../")
else:
    os.makedirs("weights")
    os.chdir("weights/")
    url = 'https://pjreddie.com/media/files/yolov2-tiny-voc.weights'
    wget.download(url)
    os.chdir("../")

file already exists


In [30]:
class weight_reader:
    def __init__(self, weight_file):
        self.offset = 4
        self.all_weights = np.fromfile(weight_file, dtype='float32')
        
    def read_bytes(self, size):
        read = self.all_weights[self.offset : self.offset + size]
        self.offset = self.offset + size
        return read

In [31]:
if(os.path.exists('weights/yolov2-tiny-voc.weights')):
    wr = weight_reader('weights/yolov2-tiny-voc.weights')
    conv_ = 8
    
    for i in range(1, conv_ + 1):
        conv_layer = model.get_layer('conv_' + str(i))
    
        if i < conv_:
            norm_layer = model.get_layer('norm_' + str(i))
            size = np.prod(norm_layer.get_weights()[0].shape)
        
            beta  = wr.read_bytes(size)
            gamma = wr.read_bytes(size)
            mean  = wr.read_bytes(size)
            var   = wr.read_bytes(size)
        
            weights = norm_layer.set_weights([gamma, beta, mean, var])
        
        if len(conv_layer.get_weights()) > 1:
            bias   = wr.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
            kernel = wr.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
            kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
            kernel = kernel.transpose([2,3,1,0])
            conv_layer.set_weights([kernel, bias])
        else:
            kernel = wr.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
            kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
            kernel = kernel.transpose([2,3,1,0])
            conv_layer.set_weights([kernel])
        
    print('Model loaded with pre-trained weights.')
else:
    print('Weights file doesn\'t exists.')

Model loaded with pre-trained weights.


# Download Training and Testing data

In [None]:
# Save current directory path
curr_dir = os.getcwd()

# Training Data
if(os.path.isdir("data") and os.path.exists("data/train2014.zip")):
    if(os.path.isdir("data/Ext/images/train2014") and len(os.listdir('data/Ext/images/train2014')) != 0):
        print('Training files available on disk.')
    else:
        with zipfile.ZipFile("data/train2014.zip", 'r') as zip_ref:
            zip_ref.extractall("data/Ext/images/")
else:
    if(os.path.isdir("data")):
        os.chdir("data/")
    else:
        os.makedirs("data/Ext/images")
        os.chdir("data/")
    url = 'http://images.cocodataset.org/zips/train2014.zip'
    filename = wget.download(url)
    with zipfile.ZipFile("train2014.zip", 'r') as zip_ref:
        zip_ref.extractall("./Ext/images")

os.chdir(curr_dir)

# Training Data Annotation
if(os.path.isdir("data") and os.path.exists("data/annotations_trainval2014.zip")):
    if(os.path.isdir("data/Ext/annotations") and len(os.listdir('data/Ext/annotations')) != 0):
        print('Training annotation files available on disk.')
    else:
        with zipfile.ZipFile("data/annotations_trainval2014.zip", 'r') as zip_ref:
            zip_ref.extractall("data/Ext/")
else:
    if(os.path.isdir("data")):
        os.chdir("data/")
    else:
        os.makedirs("data")
        os.chdir("data/")
    url = 'http://images.cocodataset.org/annotations/annotations_trainval2014.zip'
    filename = wget.download(url)
    with zipfile.ZipFile("annotations_trainval2014.zip", 'r') as zip_ref:
        zip_ref.extractall("./Ext/")

os.chdir(curr_dir)

# Validation/Testing Data Annotation
if(os.path.isdir("data") and os.path.exists("data/val2014.zip")):
    if(os.path.isdir("data/Ext/images/val2014") and len(os.listdir('data/Ext/images/val2014')) != 0):
        print('Validation / Testing files available on disk.')
    else:
        with zipfile.ZipFile("data/val2014.zip", 'r') as zip_ref:
            zip_ref.extractall("data/Ext/images/")
else:
    if(os.path.isdir("data")):
        os.chdir("data/")
    else:
        os.makedirs("data")
        os.chdir("data/")
    url = 'http://images.cocodataset.org/zips/val2014.zip'
    filename = wget.download(url)
    with zipfile.ZipFile("val2014.zip", 'r') as zip_ref:
        zip_ref.extractall("./Ext/images/")

os.chdir(curr_dir)

# Convert data from COCO format to VOC

In [None]:
os.makedirs("data/Ext/images/train2014ann")
!python coco2pascal.py create_annotations data/Ext train data/Ext/images/train2014ann

In [10]:
os.makedirs("data/Ext/images/val2014ann")
!python coco2pascal.py create_annotations data/Ext val data/Ext/images/val2014ann

COCO_val2014_000000360951
COCO_val2014_000000245642
COCO_val2014_000000006226
COCO_val2014_000000011291
COCO_val2014_000000423410
COCO_val2014_000000107094
COCO_val2014_000000055375
COCO_val2014_000000172265
COCO_val2014_000000145956
COCO_val2014_000000018193
COCO_val2014_000000331447
COCO_val2014_000000094501
COCO_val2014_000000314729
COCO_val2014_000000335924
COCO_val2014_000000236000
COCO_val2014_000000540556
COCO_val2014_000000368367
COCO_val2014_000000120806
COCO_val2014_000000457491
COCO_val2014_000000404027
COCO_val2014_000000179954
COCO_val2014_000000507714
COCO_val2014_000000095692
COCO_val2014_000000216333
COCO_val2014_000000484404
COCO_val2014_000000162914
COCO_val2014_000000097767
COCO_val2014_000000191691
COCO_val2014_000000079565
COCO_val2014_000000484760
COCO_val2014_000000456046
COCO_val2014_000000248771
COCO_val2014_000000417216
COCO_val2014_000000475641
COCO_val2014_000000255479
COCO_val2014_000000471842
COCO_val2014_000000103375
COCO_val2014_000000188439
COCO_val2014

COCO_val2014_000000213255
COCO_val2014_000000064974
COCO_val2014_000000198612
COCO_val2014_000000325391
COCO_val2014_000000311041
COCO_val2014_000000544611
COCO_val2014_000000324008
COCO_val2014_000000056344
COCO_val2014_000000189203
COCO_val2014_000000455395
COCO_val2014_000000084463
COCO_val2014_000000013031
COCO_val2014_000000266727
COCO_val2014_000000117036
COCO_val2014_000000511349
COCO_val2014_000000553094
COCO_val2014_000000094536
COCO_val2014_000000417416
COCO_val2014_000000269975
COCO_val2014_000000226498
COCO_val2014_000000060203
COCO_val2014_000000488832
COCO_val2014_000000503277
COCO_val2014_000000564659
COCO_val2014_000000203734
COCO_val2014_000000042661
COCO_val2014_000000566631
COCO_val2014_000000477671
COCO_val2014_000000368294
COCO_val2014_000000136740
COCO_val2014_000000248242
COCO_val2014_000000475887
COCO_val2014_000000222788
COCO_val2014_000000051984
COCO_val2014_000000168260
COCO_val2014_000000104345
COCO_val2014_000000558083
COCO_val2014_000000282229
COCO_val2014

COCO_val2014_000000473069
COCO_val2014_000000262810
COCO_val2014_000000530220
COCO_val2014_000000298722
COCO_val2014_000000115412
COCO_val2014_000000232489
COCO_val2014_000000474410
COCO_val2014_000000452917
COCO_val2014_000000446324
COCO_val2014_000000444913
COCO_val2014_000000454693
COCO_val2014_000000420914
COCO_val2014_000000464682
COCO_val2014_000000416098
COCO_val2014_000000136722
COCO_val2014_000000166287
COCO_val2014_000000518116
COCO_val2014_000000272599
COCO_val2014_000000444672
COCO_val2014_000000069194
COCO_val2014_000000539141
COCO_val2014_000000131390
COCO_val2014_000000173959
COCO_val2014_000000149196
COCO_val2014_000000107430
COCO_val2014_000000132814
COCO_val2014_000000455229
COCO_val2014_000000150623
COCO_val2014_000000038693
COCO_val2014_000000197492
COCO_val2014_000000389116
COCO_val2014_000000122747
COCO_val2014_000000062500
COCO_val2014_000000443343
COCO_val2014_000000011615
COCO_val2014_000000356263
COCO_val2014_000000060132
COCO_val2014_000000530162
COCO_val2014

COCO_val2014_000000381610
COCO_val2014_000000084235
COCO_val2014_000000337692
COCO_val2014_000000137003
COCO_val2014_000000177489
COCO_val2014_000000249219
COCO_val2014_000000247599
COCO_val2014_000000323515
COCO_val2014_000000099341
COCO_val2014_000000193547
COCO_val2014_000000083602
COCO_val2014_000000228197
COCO_val2014_000000574845
COCO_val2014_000000347203
COCO_val2014_000000356500
COCO_val2014_000000491765
COCO_val2014_000000382030
COCO_val2014_000000064425
COCO_val2014_000000252639
COCO_val2014_000000141616
COCO_val2014_000000338428
COCO_val2014_000000049731
COCO_val2014_000000013904
COCO_val2014_000000443630
COCO_val2014_000000260036
COCO_val2014_000000026183
COCO_val2014_000000133061
COCO_val2014_000000479334
COCO_val2014_000000417043
COCO_val2014_000000452137
COCO_val2014_000000403345
COCO_val2014_000000426611
COCO_val2014_000000372067
COCO_val2014_000000315672
COCO_val2014_000000220670
COCO_val2014_000000196518
COCO_val2014_000000334530
COCO_val2014_000000004066
COCO_val2014

COCO_val2014_000000047419
COCO_val2014_000000508370
COCO_val2014_000000371890
COCO_val2014_000000277614
COCO_val2014_000000047177
COCO_val2014_000000103747
COCO_val2014_000000456768
COCO_val2014_000000444073
COCO_val2014_000000210099
COCO_val2014_000000122535
COCO_val2014_000000226848
COCO_val2014_000000506552
COCO_val2014_000000515904
COCO_val2014_000000413235
COCO_val2014_000000074132
COCO_val2014_000000100853
COCO_val2014_000000301429
COCO_val2014_000000513615
COCO_val2014_000000570594
COCO_val2014_000000042070
COCO_val2014_000000099348
COCO_val2014_000000065806
COCO_val2014_000000282674
COCO_val2014_000000192730
COCO_val2014_000000385098
COCO_val2014_000000170038
COCO_val2014_000000554286
COCO_val2014_000000302643
COCO_val2014_000000529096
COCO_val2014_000000356293
COCO_val2014_000000327413
COCO_val2014_000000453341
COCO_val2014_000000158254
COCO_val2014_000000557447
COCO_val2014_000000081224
COCO_val2014_000000410614
COCO_val2014_000000565903
COCO_val2014_000000575441
COCO_val2014

COCO_val2014_000000579893
COCO_val2014_000000556932
COCO_val2014_000000233266
COCO_val2014_000000483446
COCO_val2014_000000530905
COCO_val2014_000000032811
COCO_val2014_000000438364
COCO_val2014_000000388846
COCO_val2014_000000066239
COCO_val2014_000000161465
COCO_val2014_000000512112
COCO_val2014_000000432016
COCO_val2014_000000162867
COCO_val2014_000000548561
COCO_val2014_000000000285
COCO_val2014_000000419401
COCO_val2014_000000322762
COCO_val2014_000000552188
COCO_val2014_000000174866
COCO_val2014_000000334073
COCO_val2014_000000574282
COCO_val2014_000000551822
COCO_val2014_000000226412
COCO_val2014_000000342499
COCO_val2014_000000293221
COCO_val2014_000000517987
COCO_val2014_000000455325
COCO_val2014_000000534394
COCO_val2014_000000011260
COCO_val2014_000000172396
COCO_val2014_000000338044
COCO_val2014_000000096615
COCO_val2014_000000058043
COCO_val2014_000000329687
COCO_val2014_000000101936
COCO_val2014_000000395388
COCO_val2014_000000214471
COCO_val2014_000000559842
COCO_val2014

COCO_val2014_000000401971
COCO_val2014_000000502558
COCO_val2014_000000050778
COCO_val2014_000000579561
COCO_val2014_000000215151
COCO_val2014_000000082718
COCO_val2014_000000138814
COCO_val2014_000000554348
COCO_val2014_000000463876
COCO_val2014_000000192181
COCO_val2014_000000181572
COCO_val2014_000000121586
COCO_val2014_000000026430
COCO_val2014_000000026430.jpg
COCO_val2014_000000450543
COCO_val2014_000000459662
COCO_val2014_000000197499
COCO_val2014_000000079920
COCO_val2014_000000213687
COCO_val2014_000000454610
COCO_val2014_000000496450
COCO_val2014_000000466416
COCO_val2014_000000335374
COCO_val2014_000000078371
COCO_val2014_000000472143
COCO_val2014_000000398548
COCO_val2014_000000275488
COCO_val2014_000000203455
COCO_val2014_000000491408
COCO_val2014_000000218456
COCO_val2014_000000463699
COCO_val2014_000000419344
COCO_val2014_000000519691
COCO_val2014_000000075560
COCO_val2014_000000050177
COCO_val2014_000000350054
COCO_val2014_000000506569
COCO_val2014_000000083960
COCO_val

COCO_val2014_000000521549
COCO_val2014_000000575410
COCO_val2014_000000135167
COCO_val2014_000000231610
COCO_val2014_000000199404
COCO_val2014_000000412996
COCO_val2014_000000498259
COCO_val2014_000000205238
COCO_val2014_000000429366
COCO_val2014_000000150812
COCO_val2014_000000184400
COCO_val2014_000000450464
COCO_val2014_000000395644
COCO_val2014_000000052949
COCO_val2014_000000288983
COCO_val2014_000000005213
COCO_val2014_000000523274
COCO_val2014_000000241155
COCO_val2014_000000481550
COCO_val2014_000000528225
COCO_val2014_000000459967
COCO_val2014_000000555267
COCO_val2014_000000463771
COCO_val2014_000000521359
COCO_val2014_000000555898
COCO_val2014_000000106909
COCO_val2014_000000551737
COCO_val2014_000000133636
COCO_val2014_000000288501
COCO_val2014_000000491872
COCO_val2014_000000268259
COCO_val2014_000000291981
COCO_val2014_000000356494
COCO_val2014_000000058225
COCO_val2014_000000388422
COCO_val2014_000000146601
COCO_val2014_000000148737
COCO_val2014_000000164366
COCO_val2014

COCO_val2014_000000317639
COCO_val2014_000000218208
COCO_val2014_000000304083
COCO_val2014_000000391269
COCO_val2014_000000079031
COCO_val2014_000000502936
COCO_val2014_000000356414
COCO_val2014_000000181586
COCO_val2014_000000164883
COCO_val2014_000000024480
COCO_val2014_000000110769
COCO_val2014_000000248276
COCO_val2014_000000248276.jpg
COCO_val2014_000000177015
COCO_val2014_000000145295
COCO_val2014_000000315338
COCO_val2014_000000174888
COCO_val2014_000000578655
COCO_val2014_000000360767
COCO_val2014_000000272049
COCO_val2014_000000472228
COCO_val2014_000000460621
COCO_val2014_000000270001
COCO_val2014_000000412399
COCO_val2014_000000513319
COCO_val2014_000000342186
COCO_val2014_000000161752
COCO_val2014_000000454957
COCO_val2014_000000250364
COCO_val2014_000000301575
COCO_val2014_000000280530
COCO_val2014_000000125208
COCO_val2014_000000136328
COCO_val2014_000000408818
COCO_val2014_000000313182
COCO_val2014_000000117368
COCO_val2014_000000185156
COCO_val2014_000000035628
COCO_val

COCO_val2014_000000468993
COCO_val2014_000000473929
COCO_val2014_000000498383
COCO_val2014_000000253557
COCO_val2014_000000109403
COCO_val2014_000000538320
COCO_val2014_000000235319
COCO_val2014_000000468885
COCO_val2014_000000255627
COCO_val2014_000000031749
COCO_val2014_000000189424
COCO_val2014_000000108130
COCO_val2014_000000446894
COCO_val2014_000000580117
COCO_val2014_000000557324
COCO_val2014_000000198204
COCO_val2014_000000143533
COCO_val2014_000000479067
COCO_val2014_000000522137
COCO_val2014_000000048786
COCO_val2014_000000485390
COCO_val2014_000000168801
COCO_val2014_000000537672
COCO_val2014_000000214367
COCO_val2014_000000474430
COCO_val2014_000000513417
COCO_val2014_000000305480
COCO_val2014_000000232863
COCO_val2014_000000133195
COCO_val2014_000000219820
COCO_val2014_000000119075
COCO_val2014_000000550432
COCO_val2014_000000088162
COCO_val2014_000000358606
COCO_val2014_000000369045
COCO_val2014_000000290196
COCO_val2014_000000023937
COCO_val2014_000000199442
COCO_val2014

COCO_val2014_000000302038
COCO_val2014_000000431523
COCO_val2014_000000456223
COCO_val2014_000000449903
COCO_val2014_000000522940
COCO_val2014_000000307026
COCO_val2014_000000236874
COCO_val2014_000000225731
COCO_val2014_000000250758
COCO_val2014_000000143961
COCO_val2014_000000449433
COCO_val2014_000000062167
COCO_val2014_000000483965
COCO_val2014_000000430238
COCO_val2014_000000250301
COCO_val2014_000000420110
COCO_val2014_000000395083
COCO_val2014_000000029197
COCO_val2014_000000031240
COCO_val2014_000000149568
COCO_val2014_000000505967
COCO_val2014_000000508312
COCO_val2014_000000442549
COCO_val2014_000000044328
COCO_val2014_000000178606
COCO_val2014_000000065567
COCO_val2014_000000091373
COCO_val2014_000000160820
COCO_val2014_000000327165
COCO_val2014_000000169438
COCO_val2014_000000398339
COCO_val2014_000000527695
COCO_val2014_000000421370
COCO_val2014_000000565245
COCO_val2014_000000291245
COCO_val2014_000000415646
COCO_val2014_000000072052
COCO_val2014_000000489088
COCO_val2014

COCO_val2014_000000486803
COCO_val2014_000000556278
COCO_val2014_000000378652
COCO_val2014_000000274613
COCO_val2014_000000480223
COCO_val2014_000000430788
COCO_val2014_000000326128
COCO_val2014_000000227478
COCO_val2014_000000281541
COCO_val2014_000000417055
COCO_val2014_000000310858
COCO_val2014_000000480490
COCO_val2014_000000283520
COCO_val2014_000000153973
COCO_val2014_000000412966
COCO_val2014_000000581829
COCO_val2014_000000469634
COCO_val2014_000000524977
COCO_val2014_000000573647
COCO_val2014_000000070258
COCO_val2014_000000157365
COCO_val2014_000000270785
COCO_val2014_000000035974
COCO_val2014_000000371749
COCO_val2014_000000533958
COCO_val2014_000000095191
COCO_val2014_000000419444
COCO_val2014_000000065982
COCO_val2014_000000343820
COCO_val2014_000000535602
COCO_val2014_000000143554
COCO_val2014_000000109146
COCO_val2014_000000176519
COCO_val2014_000000477500
COCO_val2014_000000150875
COCO_val2014_000000436172
COCO_val2014_000000482172
COCO_val2014_000000215482
COCO_val2014

COCO_val2014_000000397375
COCO_val2014_000000261097
COCO_val2014_000000574015
COCO_val2014_000000516733
COCO_val2014_000000433311
COCO_val2014_000000266099
COCO_val2014_000000457735
COCO_val2014_000000290515
COCO_val2014_000000012744
COCO_val2014_000000440106
COCO_val2014_000000203317
COCO_val2014_000000347724
COCO_val2014_000000058117
COCO_val2014_000000211825
COCO_val2014_000000185925
COCO_val2014_000000074656
COCO_val2014_000000187072
COCO_val2014_000000464390
COCO_val2014_000000540473
COCO_val2014_000000199236
COCO_val2014_000000338375
COCO_val2014_000000363986
COCO_val2014_000000102331
COCO_val2014_000000308507
COCO_val2014_000000057124
COCO_val2014_000000297595
COCO_val2014_000000105734
COCO_val2014_000000200231
COCO_val2014_000000546226
COCO_val2014_000000175024
COCO_val2014_000000565085
COCO_val2014_000000397734
COCO_val2014_000000436636
COCO_val2014_000000207366
COCO_val2014_000000219657
COCO_val2014_000000396903
COCO_val2014_000000321522
COCO_val2014_000000440475
COCO_val2014

COCO_val2014_000000157617
COCO_val2014_000000337969
COCO_val2014_000000305540
COCO_val2014_000000437221
COCO_val2014_000000400123
COCO_val2014_000000280733
COCO_val2014_000000413955
COCO_val2014_000000199050
COCO_val2014_000000432570
COCO_val2014_000000215812
COCO_val2014_000000290416
COCO_val2014_000000376443
COCO_val2014_000000467966
COCO_val2014_000000425004
COCO_val2014_000000062937
COCO_val2014_000000071699
COCO_val2014_000000539509
COCO_val2014_000000544432
COCO_val2014_000000227460
COCO_val2014_000000441614
COCO_val2014_000000346788
COCO_val2014_000000246014
COCO_val2014_000000536073
COCO_val2014_000000179397
COCO_val2014_000000497067
COCO_val2014_000000412015
COCO_val2014_000000062459
COCO_val2014_000000142581
COCO_val2014_000000516143
COCO_val2014_000000212939
COCO_val2014_000000452300
COCO_val2014_000000043753
COCO_val2014_000000415949
COCO_val2014_000000469169
COCO_val2014_000000120234
COCO_val2014_000000186721
COCO_val2014_000000036598
COCO_val2014_000000108193
COCO_val2014

COCO_val2014_000000459645
COCO_val2014_000000200143
COCO_val2014_000000033441
COCO_val2014_000000188946
COCO_val2014_000000176527
COCO_val2014_000000488743
COCO_val2014_000000109939
COCO_val2014_000000281754
COCO_val2014_000000392985
COCO_val2014_000000508846
COCO_val2014_000000203732
COCO_val2014_000000333480
COCO_val2014_000000356380
COCO_val2014_000000277073
COCO_val2014_000000160666
COCO_val2014_000000546782
COCO_val2014_000000504811
COCO_val2014_000000428658
COCO_val2014_000000212072
COCO_val2014_000000042144
COCO_val2014_000000063796
COCO_val2014_000000170211
COCO_val2014_000000092634
COCO_val2014_000000558457
COCO_val2014_000000420120
COCO_val2014_000000451228
COCO_val2014_000000124262
COCO_val2014_000000296676
COCO_val2014_000000421471
COCO_val2014_000000323895
COCO_val2014_000000229858
COCO_val2014_000000397958
COCO_val2014_000000300855
COCO_val2014_000000240259
COCO_val2014_000000106617
COCO_val2014_000000510329
COCO_val2014_000000007155
COCO_val2014_000000152823
COCO_val2014

COCO_val2014_000000190829
COCO_val2014_000000497593
COCO_val2014_000000144003
COCO_val2014_000000187887
COCO_val2014_000000419867
COCO_val2014_000000376246
COCO_val2014_000000553057
COCO_val2014_000000270316
COCO_val2014_000000153834
COCO_val2014_000000574069
COCO_val2014_000000300214
COCO_val2014_000000217071
COCO_val2014_000000038938
COCO_val2014_000000502006
COCO_val2014_000000073634
COCO_val2014_000000408950
COCO_val2014_000000252716
COCO_val2014_000000301266
COCO_val2014_000000419119
COCO_val2014_000000491683
COCO_val2014_000000211653
COCO_val2014_000000422807
COCO_val2014_000000015810
COCO_val2014_000000352111
COCO_val2014_000000454623
COCO_val2014_000000473214
COCO_val2014_000000417547
COCO_val2014_000000523371
COCO_val2014_000000395849
COCO_val2014_000000130043
COCO_val2014_000000457230
COCO_val2014_000000072018
COCO_val2014_000000213579
COCO_val2014_000000505461
COCO_val2014_000000488539
COCO_val2014_000000302823
COCO_val2014_000000001682
COCO_val2014_000000362563
COCO_val2014

COCO_val2014_000000287171
COCO_val2014_000000344125
COCO_val2014_000000252292
COCO_val2014_000000441541
COCO_val2014_000000523212
COCO_val2014_000000378403
COCO_val2014_000000080096
COCO_val2014_000000401004
COCO_val2014_000000484754
COCO_val2014_000000209989
COCO_val2014_000000199634
COCO_val2014_000000502311
COCO_val2014_000000328890
COCO_val2014_000000179948
COCO_val2014_000000448275
COCO_val2014_000000139953
COCO_val2014_000000401092
COCO_val2014_000000566758
COCO_val2014_000000266951
COCO_val2014_000000087890
COCO_val2014_000000228506
COCO_val2014_000000475108
COCO_val2014_000000051741
COCO_val2014_000000470931
COCO_val2014_000000102329
COCO_val2014_000000521605
COCO_val2014_000000057265
COCO_val2014_000000498225
COCO_val2014_000000240387
COCO_val2014_000000446574
COCO_val2014_000000109916
COCO_val2014_000000031000
COCO_val2014_000000435012
COCO_val2014_000000056651
COCO_val2014_000000277642
COCO_val2014_000000361400
COCO_val2014_000000561088
COCO_val2014_000000315705
COCO_val2014

In [11]:
training_data   = "data/Ext/images/train2014/"
training_data_ann = "data/Ext/images/train2014ann/"
validation_data = "data/Ext/images/val2014/"
validation_data_ann = "data/Ext/images/val2014ann/"

# Parse annotations to generator training and validation generator

In [12]:
gen_config = {
    'IMAGE_H'          : IMAGE_H,
    'IMAGE_W'          : IMAGE_W,
    'GRID_H'           : GRID_H,
    'GRID_W'           : GRID_W,
    'BOX'              : BOX,
    'LABELS'           : LABELS,
    'CLASS'            : len(LABELS),
    'ANCHORS'          : ANCHORS,
    'BATCH_SIZE'       : BATCH_SIZE,
    'TRUE_BOX_BUFFER'  : TRUE_BOX_BUFFER,
}

In [13]:
def normalize(image):
    return image / 255

In [14]:
train_imgs, seen_train_labels = parse_annotation(training_data_ann, \
                                                 training_data,     \
                                                 labels=LABELS)

train_batch = BatchGenerator(train_imgs, gen_config, norm=normalize)


valid_imgs, seen_valid_labels = parse_annotation(validation_data_ann, \
                                                 validation_data,     \
                                                 labels=LABELS)

valid_batch = BatchGenerator(valid_imgs, gen_config, norm=normalize, jitter=False)

# Loss Function

## 1 - Classification Loss

if an object is detected, the classification loss at each cell is the squared error of the class conditional probabilities for each loss:

<img src="images/ClassificationLoss.png" style="width:500px;height:250;">
<caption><center> <u> **Figure 1** </u>: **Classification Loss**<br> </center></caption>

In [15]:
def classification_loss(y_true, y_pred):

    mask_shape = tf.shape(y_true)[:4]
    
    class_mask = tf.zeros(mask_shape)
    
    # true classification box
    true_box_class = tf.argmax(y_true[...,5:], -1)
    
    # classification mask
    class_mask = y_true[...,4] * tf.gather(CLASS_WEIGHTS, true_box_class)

    # number of classification boxes
    nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
    
    # predicted box
    pred_box_class = y_pred[...,5:]

    loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
    
    loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + EPSILON)
    
    loss_class = tf.Print(loss_class, [loss_class], message='Loss Class \t', summarize=1000)
    
    return loss_class

## 2 - Localization Loss

The localization loss measures the errors in the predicted boundary box locations and sizes. We only count the box responsible for detecting the object.

<img src="images/LocalizationLoss.png" style="width:500px;height:250;">
<caption><center> <u> **Figure 2** </u>: **Localization Loss**<br> </center></caption>

We do not want to weight absolute errors in large boxes and small boxes equally. i.e. a 2-pixel error in a large box is the same for a small box. To partially address this, YOLO predicts the square root of the bounding box width and height instead of the width and height. In addition, to put more emphasis on the boundary box accuracy, we multiply the loss by λcoord (default: 5).

In [16]:
def localization_loss(y_true, y_pred):
    mask_shape = tf.shape(y_true)[:4]
    
    cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1)))
    cell_y = tf.transpose(cell_x, (0,2,1,3,4))

    cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [BATCH_SIZE, 1, 1, 5, 1])
    
    coord_mask = tf.zeros(mask_shape)
    
    seen = tf.Variable(0.)
    
    """
    Adjust prediction
    """
    ### adjust x and y      
    pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
    
    ### adjust w and h
    pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(ANCHORS, [1,1,1,BOX,2])
    
    """
    Adjust ground truth
    """
    ### adjust x and y
    true_box_xy = y_true[..., 0:2] # relative position to the containing cell
    
    ### adjust w and h
    true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically   
    
    """
    Determine the masks
    """
    ### coordinate mask: simply the position of the ground truth boxes (the predictors)
    coord_mask = tf.expand_dims(y_true[..., 4], axis=-1)
    
    no_boxes_mask = tf.to_float(coord_mask < COORD_SCALE/2.)
    seen = tf.assign_add(seen, 1.)
    
    true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, WARM_UP_BATCHES), 
                          lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask, 
                                   true_box_wh + tf.ones_like(true_box_wh) * np.reshape(ANCHORS, [1,1,1,BOX,2]) * no_boxes_mask, 
                                   tf.ones_like(coord_mask)],
                          lambda: [true_box_xy, 
                                   true_box_wh,
                                   coord_mask])

    nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
    
    loss_xy    = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_wh    = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
    
    Localization_Loss = loss_xy + loss_wh
    
    # Localization_Loss = tf.Print(Localization_Loss, [loss_xy], message='Loss XY \t', summarize=1000)
    # Localization_Loss = tf.Print(Localization_Loss, [loss_wh], message='Loss WH \t', summarize=1000)
    
    return Localization_Loss

## 3 - Confidence Loss

If an object is detected in the box, the confidence loss (measuring the objectness of the box) is:

<img src="images/ConfidenceLoss1.png" style="width:500px;height:250;">
<caption><center> <u> **Figure 3** </u>: **Confidence Loss**<br> </center></caption>

If an object is not detected in the box, the confidence loss is:

<img src="images/ConfidenceLoss2.png" style="width:500px;height:250;">
<caption><center> <u> **Figure 4** </u>: **Confidence Loss**<br> </center></caption>

Most boxes do not contain any objects. This causes a class imbalance problem, i.e. we train the model to detect background more frequently than detecting objects. To remedy this, we weight this loss down by a factor λnoobj (default: 0.5).

In [17]:
def confidence_loss(y_true, y_pred):
    
    mask_shape = tf.shape(y_true)[:4]
    
    cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1)))
    cell_y = tf.transpose(cell_x, (0,2,1,3,4))

    cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [BATCH_SIZE, 1, 1, 5, 1])
    
    conf_mask  = tf.zeros(mask_shape)
    
    """
    Adjust prediction
    """
    ### adjust x and y      
    pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
    
    ### adjust w and h
    pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(ANCHORS, [1,1,1,BOX,2])
    
    ### adjust confidence
    pred_box_conf = tf.sigmoid(y_pred[..., 4])
    
    """
    Adjust ground truth
    """
    ### adjust x and y
    true_box_xy = y_true[..., 0:2] # relative position to the containing cell
    
    ### adjust w and h
    true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically
    
    ### adjust confidence
    true_wh_half = true_box_wh / 2.
    true_mins    = true_box_xy - true_wh_half
    true_maxes   = true_box_xy + true_wh_half
    
    pred_wh_half = pred_box_wh / 2.
    pred_mins    = pred_box_xy - pred_wh_half
    pred_maxes   = pred_box_xy + pred_wh_half       
    
    intersect_mins  = tf.maximum(pred_mins,  true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
    
    true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
    pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores  = tf.truediv(intersect_areas, union_areas)
    
    true_box_conf = iou_scores * y_true[..., 4]
    
    ### confidence mask: penelize predictors + penalize boxes with low IOU
    # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]
    
    true_wh_half = true_wh / 2.
    true_mins    = true_xy - true_wh_half
    true_maxes   = true_xy + true_wh_half
    
    pred_xy = tf.expand_dims(pred_box_xy, 4)
    pred_wh = tf.expand_dims(pred_box_wh, 4)
    
    pred_wh_half = pred_wh / 2.
    pred_mins    = pred_xy - pred_wh_half
    pred_maxes   = pred_xy + pred_wh_half    
    
    intersect_mins  = tf.maximum(pred_mins,  true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
    
    true_areas = true_wh[..., 0] * true_wh[..., 1]
    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores  = tf.truediv(intersect_areas, union_areas)

    best_ious = tf.reduce_max(iou_scores, axis=4)
    conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4])
    
    # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
    conf_mask = conf_mask + y_true[..., 4] * OBJECT_SCALE

    nb_conf_box  = tf.reduce_sum(tf.to_float(conf_mask  > 0.0))

    loss_conf  = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf) * conf_mask)  / (nb_conf_box  + EPSILON) / 2.
    
    # loss_conf = tf.Print(loss_conf, [loss_conf], message='Loss Conf \t', summarize=1000)
    
    return loss_conf

## Total Loss

The final loss adds localization, confidence and classification losses together.

<img src="images/TotalLoss.png" style="width:500px;height:250;">
<caption><center> <u> **Figure 5** </u>: **Total Loss**<br> </center></caption>

In [18]:
def total_loss(y_true, y_pred):
    loss = classification_loss(y_true, y_pred) + localization_loss(y_true, y_pred) + confidence_loss(y_true, y_pred)
    loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
    return loss

# Optimizer (Adam)

In [32]:
'''
https://keras.io/optimizers/

Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
'''

optimizer = Adam(lr=0.5e-5, epsilon=EPSILON)

# Configure the model for training

In [33]:
'''
https://keras.io/models/sequential/

compile(optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, \
                                                        weighted_metrics=None, target_tensors=None)
'''
model.compile(loss=total_loss, optimizer=optimizer)

# Callback functions

In [34]:
'''
https://keras.io/callbacks/

EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto', \
                                                  baseline=None, restore_best_weights=False)

ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False,  \
                                              save_weights_only=False, mode='auto', period=1)
'''
EarlyStop = EarlyStopping(min_delta=0.001, patience=5, mode='min', verbose=1)

if(os.path.isdir("CheckPoint")):
    print('Check point directory exists.')
else:
    os.makedirs("CheckPoint")

MakeCheckPoint = ModelCheckpoint("CheckPoint/weights.{epoch:02d}-{val_loss:.2f}.hdf5", \
                                 verbose=1, save_best_only=True, mode='min')

Check point directory exists.


# Start training

In [35]:
'''
https://keras.io/models/sequential/

fit_generator(generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, \
                   validation_data=None, validation_steps=None, class_weight=None,  \
                   max_queue_size=10, workers=1, use_multiprocessing=False,         \
                   shuffle=True, initial_epoch=0)
'''

model.fit_generator(
                    generator        = train_batch,
                    steps_per_epoch  = len(train_batch),
                    epochs           = 10,
                    validation_data  = valid_batch,
                    validation_steps = len(valid_batch),
                    callbacks        = [EarlyStop, MakeCheckPoint],
                   )

Epoch 1/10

Epoch 00001: val_loss improved from inf to 9.01431, saving model to CheckPoint/weights.01-9.01.hdf5
Epoch 2/10

Epoch 00002: val_loss improved from 9.01431 to 4.07161, saving model to CheckPoint/weights.02-4.07.hdf5
Epoch 3/10

Epoch 00003: val_loss improved from 4.07161 to 2.52534, saving model to CheckPoint/weights.03-2.53.hdf5
Epoch 4/10

Epoch 00004: val_loss improved from 2.52534 to 2.11336, saving model to CheckPoint/weights.04-2.11.hdf5
Epoch 5/10

Epoch 00005: val_loss improved from 2.11336 to 1.88730, saving model to CheckPoint/weights.05-1.89.hdf5
Epoch 6/10

Epoch 00006: val_loss improved from 1.88730 to 1.78549, saving model to CheckPoint/weights.06-1.79.hdf5
Epoch 7/10

Epoch 00007: val_loss improved from 1.78549 to 1.71714, saving model to CheckPoint/weights.07-1.72.hdf5
Epoch 8/10

Epoch 00008: val_loss improved from 1.71714 to 1.67505, saving model to CheckPoint/weights.08-1.68.hdf5
Epoch 9/10

Epoch 00009: val_loss improved from 1.67505 to 1.60425, saving m

<keras.callbacks.History at 0x23e54d62ba8>

### Non-max suppression ###

Even after filtering by thresholding over the classes scores, you still end up a lot of overlapping boxes. A second filter for selecting the right boxes is called non-maximum suppression (NMS). 

<img src="images/non-max-suppression.png" style="width:500px;height:400;">
<caption><center> <u> **Figure 5** </u>: In this example, the model has predicted 3 cars, but it's actually 3 predictions of the same car. Running non-max suppression (NMS) will select only the most accurate (highest probabiliy) one of the 3 boxes. <br> </center></caption>


Non-max suppression uses the very important function called **"Intersection over Union"**, or IoU.
<img src="images/iou.png" style="width:500px;height:400;">
<caption><center> <u> **Figure 6** </u>: Definition of "Intersection over Union". <br> </center></caption> 

## Test Tiny YOLO

In [65]:
def predict(xmodel, image_file):

    # Preprocess your image
    image, image_data = preprocess_image("images/" + image_file, model_image_size = (IMAGE_H, IMAGE_W))


    _array = np.zeros((1,1,1,1,TRUE_BOX_BUFFER,4))

    netout = xmodel.predict([image_data, _array])

    return netout

In [66]:
def decode_network_output(netout):
    return decode_netout(netout[0], 
                        anchors=ANCHORS, 
                        nb_class=CLASS)

Run the following cell on the "test.jpg" image to verify that your function is correct.

In [67]:
netout = predict(model, "COCO_val2014_000000385918.jpg")

In [68]:
boxes = decode_network_output(netout)

In [69]:
for box in boxes:
    print(LABELS[box.get_label()])

person
person


**References**: The ideas presented in this notebook came primarily from the two YOLO papers. The implementation here also took significant inspiration and used many components from Allan Zelener's github repository. The pretrained weights used in this exercise came from the official YOLO website. 
- Joseph Redmon, Santosh Divvala, Ross Girshick, Ali Farhadi - [You Only Look Once: Unified, Real-Time Object Detection](https://arxiv.org/abs/1506.02640) (2015)
- Joseph Redmon, Ali Farhadi - [YOLO9000: Better, Faster, Stronger](https://arxiv.org/abs/1612.08242) (2016)
- Allan Zelener - [YAD2K: Yet Another Darknet 2 Keras](https://github.com/allanzelener/YAD2K)
- The official YOLO website (https://pjreddie.com/darknet/yolo/) 