# YOLO v3 Bottleneck Training Walkthrough

In this example we will go through the YOLO configuration using an example from :
- https://github.com/qqwweee/keras-yolo3

Code is included for configuring and saving the weights of the model. Also, we will use utilities for some of the process such as the detailed creation of the model and the process of creating the loss function. Each of these operations are detatiled and should be separately investigated, to help bolster your understanding. 

In [None]:
# Run these command line arguments to convert darknet to a keras implementation

# Get darknet
!wget https://pjreddie.com/media/files/yolov3.weights

# Convert to keras and save
!python convert.py yolov3.cfg yolov3.weights model_data/yolo.h5

In [None]:
import os
import numpy as np
import keras.backend as K
from keras.layers import Input, Lambda
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss
from yolo3.utils import get_random_data

In [None]:
# Creating the YOLO model with darknet
def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
            weights_path='model_data/yolo_weights.h5'):
    '''create the training model'''
    
    K.clear_session() # get a new session
    # define image as none for now, 
    image_input = Input(shape=(None, None, 3))
    h, w = input_shape
    num_anchors = len(anchors) # anchors are the number of boxes

    # create three input branches. These will actually be used for 
    #   labeling the ground truth, not actual inputs to YOLO
    sizes = [32, 16, 8] 
    y_true = [Input(shape=(h//s, w//s, num_anchors//3, num_classes+5)) for s in sizes]

    # use utility function to create a YOLO keras model
    #    Look into the yolo_body function to see that its
    #    (1) uses DarkNet as initial model
    #    (2) adds three output tensors that are the YOLO 3-D class/bounding box outputs
    #        Each output is upsampled and depends on the previous output and some DarkNet layers
    #        such that we will have three outputs, 13x13, 16x16, 52x52
    model_body = yolo_body(image_input, num_anchors//3, num_classes)
    print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))

    if load_pretrained:
        # don't train from scratch! Let's use transfer learning
        # Load up the model weights using Keras
        model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
        print('Load weights {}.'.format(weights_path))
        if freeze_body in [1, 2]:
            # Freeze darknet53 body or freeze all but 3 output layers.
            num = (185, len(model_body.layers)-3)[freeze_body-1]
            for i in range(num): model_body.layers[i].trainable = False
            print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))

    # for training, we will need to create three different models (for convenience) 
    #  First let's create the bottleneck model which can easily be used 
    #   to input some inamges into DarkNet and save the features of DarkNet 
    #   before they go into the YOLO model
    
    
    # get output of DarkNet last layers for convenience
    # This allows us to save DarkNet features from images without needing to run
    # the images through a massive DarkNet model each time
    out1=model_body.layers[246].output  
    out2=model_body.layers[247].output 
    out3=model_body.layers[248].output 
    
    # Again, just a convenience model for saving features from image input
    bottleneck_model = Model([model_body.input, *y_true], 
                             [out1, out2, out3])

    # Now let's get another convenience model that can take the saved
    #  features and generate the YOLO output tensors
    
    # this should take as input the outputs from the bottleneck
    in0 = Input(shape=bottleneck_model.output[0].shape[1:].as_list()) 
    in1 = Input(shape=bottleneck_model.output[1].shape[1:].as_list())
    in2 = Input(shape=bottleneck_model.output[2].shape[1:].as_list())
    
    # and the output should be the YOLO output
    last_out0=model_body.layers[249](in0)
    last_out1=model_body.layers[250](in1)
    last_out2=model_body.layers[251](in2)
    
    # create a placeholder model (we need this to setup the loss function)
    model_last=Model(inputs=[in0, in1, in2], outputs=[last_out0, last_out1, last_out2])
    
    # the output of this layer is the actual loss function from YOLO
    # the loss function is ridiculously complicated to code up so refer to the slides here
    # look at "yolo_loss" in the model directory. There is a good deal to parse!
    # the main goal is that it trains the bounding box regression, objectness, and classification
    # but takes a lot of pre-processing of the ground truth to setup the correct tensor
    # not to mention only some tensor outputs are updated depending on the bounding box center
    model_loss_last =Lambda(yolo_loss, 
                            output_shape=(1,), 
                            name='yolo_loss',
                            arguments={'anchors': anchors, 
                                       'num_classes': num_classes, 
                                       'ignore_thresh': 0.5})(
                    [*model_last.output, *y_true])
    
    # this is what we can fit given image features
    # This really helps speed up things becasue we only need to calculate
    # the features form the input one time, but still can get the loss function
    last_layer_model = Model([in0,in1,in2, *y_true], 
                             model_loss_last)

    # But eventually, we will want to update the entire model, not just the final bottelneck layers
    # for that, we need the actual model that takes images as input and gives us the yolo output tensor
    model_loss = Lambda(yolo_loss, 
                        output_shape=(1,), 
                        name='yolo_loss',
                        arguments={'anchors': anchors, 
                                   'num_classes': num_classes, 
                                   'ignore_thresh': 0.5})(
                [*model_body.output, *y_true]) 
    
    # now we have the final YOLO model with loss function 
    # without any of the convenience feature saving built in. Nice!
    model = Model([model_body.input, *y_true], model_loss)

    return model, bottleneck_model, last_layer_model

In [None]:
def get_classes(classes_path):
    '''loads the class names from file'''
    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names

def get_anchors(anchors_path):
    '''loads the anchors from a file'''
    with open(anchors_path) as f:
        anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    return np.array(anchors).reshape(-1, 2)

In [None]:
def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, random=True, verbose=False):
    '''data generator for fit_generator'''
    # This function will load up images from a list of paths and yield them
    #  for use by a keras model
    #  for YOLO we need to yield the image and lables in the form of 
    #   bounding box, class label, objectness
    n = len(annotation_lines) # list of paths for images to load
    i = 0
    while True:
        image_data = []
        box_data = []
        # need to yield this many images
        for b in range(batch_size):
            if i==0 and random:
                np.random.shuffle(annotation_lines)
            # this is the magic function that maps out the bounding box
            # it also adds some random ness here as needed, like hue changes, shifts
            #   and flipping of the image horizontoally
            image, box = get_random_data(annotation_lines[i], input_shape, random=random)
            image_data.append(image) # image data
            box_data.append(box) # 
            i = (i+1) % n
        # convert to numpy
        image_data = np.array(image_data)
        if verbose:
            print("Progress: ",i,"/",n)
        box_data = np.array(box_data)
        # now convert it into the 
        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
        yield [image_data, *y_true], np.zeros(batch_size)

def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes, random=True, verbose=False):
    n = len(annotation_lines)
    if n==0 or batch_size<=0: return None
    return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, random, verbose)

def bottleneck_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, bottlenecks):
    n = len(annotation_lines)
    i = 0
    while True:
        box_data = []
        b0=np.zeros((batch_size,bottlenecks[0].shape[1],bottlenecks[0].shape[2],bottlenecks[0].shape[3]))
        b1=np.zeros((batch_size,bottlenecks[1].shape[1],bottlenecks[1].shape[2],bottlenecks[1].shape[3]))
        b2=np.zeros((batch_size,bottlenecks[2].shape[1],bottlenecks[2].shape[2],bottlenecks[2].shape[3]))
        for b in range(batch_size):
            # load up the boxes for this, but not the image data
            _, box = get_random_data(annotation_lines[i], input_shape, random=False, proc_img=False)
            box_data.append(box)
            b0[b]=bottlenecks[0][i] # just copy the features
            b1[b]=bottlenecks[1][i] # same
            b2[b]=bottlenecks[2][i] # same
            i = (i+1) % n
        box_data = np.array(box_data)
        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
        # noew yield the already processed features and true boxes with class
        yield [b0, b1, b2, *y_true], np.zeros(batch_size)

In [None]:
# setup some default parameters
annotation_path = 'train.txt'
log_dir = 'logs/000/'
classes_path = 'model_data/coco_classes.txt'
anchors_path = 'model_data/yolo_anchors.txt'
class_names = get_classes(classes_path)
num_classes = len(class_names)
anchors = get_anchors(anchors_path)

input_shape = (416,416) # multiple of 32, hw

model, bottleneck_model, last_layer_model = create_model(input_shape, anchors, num_classes,
        freeze_body=2, weights_path='model_data/yolo_weights.h5') # make sure you know what you freeze

logging = TensorBoard(log_dir=log_dir)
checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
    monitor='val_loss', save_weights_only=True, save_best_only=True, period=3)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)

val_split = 0.1
with open(annotation_path) as f:
    lines = f.readlines()
np.random.seed(10101)
np.random.shuffle(lines)
np.random.seed(None)
num_val = int(len(lines)*val_split)
num_train = len(lines) - num_val



In [None]:
model, bottleneck_model, last_layer_model = create_model(input_shape, anchors, num_classes,
            freeze_body=2, weights_path='model_data/yolo_weights.h5') # make sure you know what you freeze

In [None]:
# perform bottleneck training
if not os.path.isfile("bottlenecks.npz"):
    print("calculating bottlenecks")
    batch_size=8
    # run this through to save out the features from the input images we have labeled
    bottlenecks=bottleneck_model.predict_generator(data_generator_wrapper(lines, 
                                                                          batch_size, 
                                                                          input_shape, 
                                                                          anchors, 
                                                                          num_classes, 
                                                                          random=False, 
                                                                          verbose=True),
                                     steps=(len(lines)//batch_size)+1, max_queue_size=1)

    # this will save out those features so we can load them up and use them
    #  without running through DarkNet again
    np.savez("bottlenecks.npz", bot0=bottlenecks[0], bot1=bottlenecks[1], bot2=bottlenecks[2])

In [None]:


# load bottleneck features from file
dict_bot=np.load("bottlenecks.npz")
bottlenecks_train=[dict_bot["bot0"][:num_train], dict_bot["bot1"][:num_train], dict_bot["bot2"][:num_train]]
bottlenecks_val=[dict_bot["bot0"][num_train:], dict_bot["bot1"][num_train:], dict_bot["bot2"][num_train:]]

# train last layers with fixed bottleneck features
batch_size=8
print("Training last layers with bottleneck features")
print('with {} samples, val on {} samples and batch size {}.'.format(num_train, num_val, batch_size))
last_layer_model.compile(optimizer='adam', 
                         loss={'yolo_loss': lambda y_true, y_pred: y_pred})

# now run throughthe saved features and get a good YOLO output tensor weight for the given 
#  labels and 
last_layer_model.fit_generator(
        bottleneck_generator(lines[:num_train], 
                             batch_size, input_shape, 
                             anchors, num_classes, 
                             bottlenecks_train),
        steps_per_epoch=max(1, num_train//batch_size),
        validation_data=bottleneck_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, bottlenecks_val),
        validation_steps=max(1, num_val//batch_size),
        epochs=30,
        initial_epoch=0, max_queue_size=1)

# save out the trained weights (the bottleneck just points to the last layers of "model")
model.save_weights(log_dir + 'trained_weights_stage_0.h5')





In [None]:
# train last layers with some augmented data
# We need to add in some more training to prevent overfitting, so let's augment some images
# This will take a good deal longer because the features are not saved
model.compile(optimizer=Adam(lr=1e-3), loss={
    # use custom yolo_loss Lambda layer.
    'yolo_loss': lambda y_true, y_pred: y_pred})

batch_size = 16
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
model.fit_generator(
        data_generator_wrapper(lines[:num_train], batch_size, 
                               input_shape, anchors, num_classes),
        steps_per_epoch=max(1, num_train//batch_size),
        validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
        validation_steps=max(1, num_val//batch_size),
        epochs=50,
        initial_epoch=0,
        callbacks=[logging, checkpoint])

model.save_weights(log_dir + 'trained_weights_stage_1.h5')

In [None]:
# Unfreeze and continue training, to fine-tune.
# Train longer if the result is not good.

for i in range(len(model.layers)):
    model.layers[i].trainable = True
    
model.compile(optimizer=Adam(lr=1e-4), 
              loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change
print('Unfreeze all of the layers.')

batch_size = 4 # note that more GPU memory is required after unfreezing the body
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
model.fit_generator(
    data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
    steps_per_epoch=max(1, num_train//batch_size),
    validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
    validation_steps=max(1, num_val//batch_size),
    epochs=100,
    initial_epoch=50,
    callbacks=[logging, checkpoint, reduce_lr, early_stopping])

# save out the final weights! 
model.save_weights(log_dir + 'trained_weights_final.h5')

# not working? Try running this again