In [2]:
import matplotlib.pyplot as plt
import numpy as np
import os, cv2
%matplotlib inline
os.getcwd()


'/home/ubuntu/VOCdevkit'

## Define 20 classes for PASCAL VOC 2012 dataset

In [1]:
LABELS = ['aeroplane',  'bicycle', 'bird',  'boat',      'bottle', 
          'bus',        'car',      'cat',  'chair',     'cow',
          'diningtable','dog',    'horse',  'motorbike', 'person',
          'pottedplant','sheep',  'sofa',   'train',   'tvmonitor']

## Define Anchor Boxes - YOLO Algorithm

In [3]:
ANCHORS = np.array([1.07709888,  1.78171903,  # anchor box 1, width , height
                    2.71054693,  5.12469308,  # anchor box 2, width,  height
                   10.47181473, 10.09646365,  # anchor box 3, width,  height
                    5.48531347,  8.11011331]) # anchor box 4, width,  height

## Read in images and annotations

In [5]:
train_image_folder = "../VOCdevkit/VOC2012/JPEGImages/"
train_annot_folder = "../VOCdevkit/VOC2012/Annotations/"

np.random.seed(1)
from backend import parse_annotation
train_image, seen_train_labels = parse_annotation(train_annot_folder,
                                                  train_image_folder, 
                                                  labels=LABELS)
print("N train = {}".format(len(train_image)))

Using TensorFlow backend.


N train = 17125


## Generating Batches

In [6]:
from backend import SimpleBatchGenerator

BATCH_SIZE        = 200
IMAGE_H, IMAGE_W  = 416, 416
GRID_H,  GRID_W   = 13 , 13
TRUE_BOX_BUFFER   = 50
BOX               = int(len(ANCHORS)/2)
CLASS             = len(LABELS)


generator_config = {
    'IMAGE_H'         : IMAGE_H, 
    'IMAGE_W'         : IMAGE_W,
    'GRID_H'          : GRID_H,  
    'GRID_W'          : GRID_W,
    'LABELS'          : LABELS,
    'ANCHORS'         : ANCHORS,
    'BATCH_SIZE'      : BATCH_SIZE,
    'TRUE_BOX_BUFFER' : TRUE_BOX_BUFFER,
}


def normalize(image):
    return image / 255.
train_batch_generator = SimpleBatchGenerator(train_image, generator_config,
                                             norm=normalize, shuffle=True)

## Base Model : Resnet50

In [7]:
from keras.applications import ResNet50
from keras.layers import Input
input_image = Input(shape=(IMAGE_H, IMAGE_W, 3),name="input_image")
true_boxes  = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4),name="input_hack")
base_model= ResNet50(include_top=False,weights='imagenet',input_shape= (IMAGE_H, IMAGE_W, 3))
base_model.trainable = False

base_model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 416, 416, 3)   0                                            
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, 208, 208, 64)  9472        input_1[0][0]                    
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, 208, 208, 64)  256         conv1[0][0]                      
____________________________________________________________________________________________________
activation_1 (Activation)        (None, 208, 208, 64)  0           bn_conv1[0][0]                   
___________________________________________________________________________________________

## Classifier Model

In [8]:
from keras.models import Sequential, Model
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from backend import ConvBatchLReLu

'''
x = Flatten()(base_model.output)
x = Dense(32, input_dim=4096, init="uniform",activation="relu")(x)
print(x.get_shape())
x = Dense(32, activation="relu", kernel_initializer="uniform")(x)
print(x.get_shape())
x = Dense(GRID_H*GRID_W*(BOX * (4 + 1 + CLASS)), activation='linear')(x)
print(x.get_shape())

#x = Conv2D(BOX * (4 + 1 + CLASS), (1,1), strides=(1,1), padding='same', name='conv_23')(x)
#print(x.get_shape())
output_re = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS),name="final_output")(x)
model = Model(base_model.input, output_re)
model.summary()
'''

clf_model = Sequential()

clf_model.add(Dense(32, input_dim=4096, init="uniform",activation="relu", name = "dense_1"))
clf_model.add(Dense(32, activation="relu", kernel_initializer="uniform", name = "dense_2"))
clf_model.add(Dense(GRID_H*GRID_W*(BOX * (4 + 1 + CLASS)), activation='linear',name = "dense_3"))

clf_model.add(Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS),name="final_output"))
clf_model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 32)                131104    
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_3 (Dense)              (None, 16900)             557700    
_________________________________________________________________
final_output (Reshape)       (None, 13, 13, 4, 25)     0         
Total params: 689,860
Trainable params: 689,860
Non-trainable params: 0
_________________________________________________________________




## Load pre-trained YOLOv2 weights

In [16]:
from backend import set_pretrained_weight
path_to_weight = "./yolov2.weights"
nb_conv        = 22
model          = set_pretrained_weight(clf_model,nb_conv, path_to_weight)
layer          = model.layers[-9] # the last convolutional layer
initialize_weight(layer,sd=1/(GRID_H*GRID_W))

ValueError: axes don't match array

## Loss Function


In [10]:
from backend import custom_loss_core 
GRID_W             = 13
GRID_H             = 13
BATCH_SIZE         = 34
LAMBDA_NO_OBJECT = 1.0
LAMBDA_OBJECT    = 5.0
LAMBDA_COORD     = 1.0
LAMBDA_CLASS     = 1.0
    
def custom_loss(y_true, y_pred):
    return(custom_loss_core(
                     y_true,
                     y_pred,
                     true_boxes,
                     GRID_W,
                     GRID_H,
                     BATCH_SIZE,
                     ANCHORS,
                     LAMBDA_COORD,
                     LAMBDA_CLASS,
                     LAMBDA_NO_OBJECT, 
                     LAMBDA_OBJECT))

## Training

In [12]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import SGD, Adam, RMSprop

dir_log = "logs/"
try:
    os.makedirs(dir_log)
except:
    pass


BATCH_SIZE   = 32
generator_config['BATCH_SIZE'] = BATCH_SIZE

early_stop = EarlyStopping(monitor='loss', 
                           min_delta=0.001, 
                           patience=3, 
                           mode='min', 
                           verbose=1)

checkpoint = ModelCheckpoint('weights_yolo_on_voc2012.h5', 
                             monitor='loss', 
                             verbose=1, 
                             save_best_only=True, 
                             mode='min', 
                             period=1)


optimizer = Adam(lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
#optimizer = SGD(lr=1e-4, decay=0.0005, momentum=0.9)
#optimizer = RMSprop(lr=1e-4, rho=0.9, epsilon=1e-08, decay=0.0)

clf_model.compile(loss=custom_loss, optimizer=optimizer)

In [13]:
clf_model.fit_generator(generator        = train_batch_generator, 
                    steps_per_epoch  = len(train_batch_generator), 
                    epochs           = 50, 
                    verbose          = 1,
                    #validation_data  = valid_batch,
                    #validation_steps = len(valid_batch),
                    callbacks        = [early_stop, checkpoint], 
                    max_queue_size   = 3)

Epoch 1/50


ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 2 arrays: [array([[[[ 0.85098039,  0.90196078,  0.93333333],
         [ 0.85098039,  0.90196078,  0.93333333],
         [ 0.85882353,  0.89803922,  0.93333333],
         ..., 
         [ 0.85490196,  0.89803922...