In [1]:
import tensorflow as tf
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib notebook
import yolo_utils
import yolo_constants as yc

tf.enable_eager_execution()

loading annotations into memory...
Done (t=16.64s)
creating index...
index created!
loading annotations into memory...
Done (t=0.63s)
creating index...
index created!


In [2]:
import shutil
#Structure of output tensor (label) for every cell is: 
#[588 dimensional vector] with repeating (class1,...,class20,box1-params, box2-params)
def createOutputTensor(imgList):
    output = np.zeros((len(imgList), yc.CELL_NUMBER_HORI * yc.CELL_NUMBER_VERT * yc.NUM_PARAMS_LABEL))
    imgIndex = -1
    for img in imgList:
        #print(img['file_name'])
        #shutil.copy("D:\\FH\\Master\\Fachseminar\\images\\train2017\\" + img['file_name'], ".\\images\\train2017\\" + img['file_name'])
        
        imgIndex += 1
        for row in range(yc.CELL_NUMBER_VERT):
            for col in range(yc.CELL_NUMBER_HORI):
                clsValues = list()
                boxParams = list()
                
                #Every cell can only detect one object. 
                yolo_utils.getClassValuesForCell(img, clsValues, (row, col))

                #Adding box-coordinates
                yolo_utils.getBoxParamsForCell(img, boxParams, (row, col))
                
                #class values are added once to the output tensor
                for i in range(len(yc.CLASSES)):
                    output[imgIndex, row*yc.PARAMS_PER_ROW_LABEL + col*yc.NUM_PARAMS_LABEL + i] = clsValues[i]
                    
                #for every bounding box the parameters have to be added to the output tensor
                for i in range(yc.COUNT_BOUNDING_BOXES):
                    for j in range(4):
                        #the label only has 4 box parameters. The confidence score is calculated in the loss function
                        output[imgIndex, row*yc.PARAMS_PER_ROW_LABEL + col*yc.NUM_PARAMS_LABEL + yc.COUNT_CLASSES + i*4 + j] = boxParams[i][j]
                
    outputDataset = tf.data.Dataset.from_tensor_slices(output)
    return outputDataset

In [3]:
IMAGE_PATHS = yolo_utils.getImageAdresses(yc.IMAGE_LIST, yc.dataDir + yc.dataType)
VAL_IMAGE_PATHS = yolo_utils.getImageAdresses(yc.VAL_IMAGE_LIST, yc.dataDir + yc.validationPath)

imageDataset = yolo_utils.createImageDataset(IMAGE_PATHS)
outputDataset = createOutputTensor(yc.IMAGE_LIST)
image_output_ds = tf.data.Dataset.zip((imageDataset, outputDataset))

image_output_ds = image_output_ds.batch(yc.BATCH_SIZE)
image_output_ds = image_output_ds.prefetch(buffer_size=yc.AUTOTUNE)

In [4]:
def reshape_Prediction_Output(y):
    #Reshape label and prediction into 2 dimensions - (cell_count x parameter_count)
    y = tf.reshape(y, [yc.CELL_COUNT, yc.COUNT_CLASSES + yc.COUNT_BOUNDING_BOXES*5])
    class_values, box_values = tf.split(y, [yc.COUNT_CLASSES,yc.COUNT_BOUNDING_BOXES*5], -1)
    
    #2 confidence values per cell
    box_values = tf.reshape(box_values, [yc.CELL_COUNT, yc.COUNT_BOUNDING_BOXES, 5])
    box_values, confidence_values = tf.split(box_values, [4,1], -1)
    
    return class_values, box_values, confidence_values

def reshape_Label(y):
    #Reshape label and prediction into 2 dimensions - (cell_count x parameter_count)
    y = tf.reshape(y, [yc.CELL_COUNT, yc.NUM_PARAMS_LABEL])
    class_values, box_values = tf.split(y, [yc.COUNT_CLASSES,yc.COUNT_BOUNDING_BOXES*4], -1)
    box_values = tf.reshape(box_values, [yc.CELL_COUNT, yc.COUNT_BOUNDING_BOXES, 4])
    
    return class_values, box_values

def convert_Box_Parameters_IOU(box_values):
    #create tensor for transformation into image-coordinates
    #x-Axis values are created differently than y-Axis values
    cells_x_Axis = tf.to_float(tf.range(yc.CELL_NUMBER_HORI)) * yc.CELL_WIDTH
    cells_x_Axis = tf.broadcast_to(cells_x_Axis, [yc.COUNT_BOUNDING_BOXES, yc.CELL_COUNT])
    cells_x_Axis = tf.transpose(cells_x_Axis)
    cells_x_Axis = tf.reshape(cells_x_Axis, [yc.CELL_COUNT, yc.COUNT_BOUNDING_BOXES, 1])

    cells_y_Axis = tf.to_float(tf.range(yc.CELL_NUMBER_VERT)) * yc.CELL_HEIGHT
    cells_y_Axis = tf.broadcast_to(cells_y_Axis, [yc.CELL_NUMBER_VERT * yc.COUNT_BOUNDING_BOXES, yc.CELL_NUMBER_VERT])
    cells_y_Axis = tf.transpose(cells_y_Axis)
    cells_y_Axis = tf.reshape(cells_y_Axis, [yc.CELL_COUNT, yc.COUNT_BOUNDING_BOXES, 1])
    
    pos_x, pos_y, half_w, half_h = tf.split(box_values, [1, 1, 1, 1], -1)
    
    #convert positions into image-coordinates
    pos_x *= yc.CELL_WIDTH
    pos_x += cells_x_Axis
    pos_y *= yc.CELL_HEIGHT
    pos_y += cells_y_Axis
    
    #concat x and y positions to create tensor with all points in image-coordinates
    pos = tf.concat([pos_x, pos_y], -1)
    
    
    #convert width and height into image-coordinates
    half_w *= yc.IMAGE_SIZE_WIDTH
    half_h *= yc.IMAGE_SIZE_HEIGHT
    
    #concat width and height and divide by 2
    half_wh = tf.concat([half_w, half_h], -1)
    half_wh = tf.truediv(half_wh, 2.0)
    
    #get lower left point by subtracting half_wh from xy-positions -> (x - width * 0.5, y - height * 0.5)
    box_location_min =  pos - half_wh
    
    #get upper right point by adding half_wh to xy-positions -> (x + width * 0.5, y + height * 0.5)
    box_location_max = pos + half_wh
    
    return box_location_min, box_location_max

In [5]:
def get_Intersection_Area(box_min_label, box_max_label, box_min_pred, box_max_pred):
    #the maximum of the minimum boxes is inside both boxes
    intersection_max = tf.maximum(box_min_pred, box_min_label)
    
    #the minimum of the maximum boxes is inside both boxes
    intersection_min = tf.minimum(box_max_pred, box_max_label)
    
    #if there is an intersection, min - max should be greater than 0
    #by subtracting we no longer have xy-coordinates in the intersection-tensor stored, but width and height of the intersection
    intersection = tf.maximum(intersection_min - intersection_max, 0.0)
    
    #calculate intersection area
    intersection_a, intersection_b = tf.split(intersection, [1, 1], -1)
    intersection_area = intersection_a * intersection_b
    
    return intersection_area

def get_Union_Area(box_min_label, box_max_label, box_min_pred, box_max_pred, intersection_Area):
    label_Area = box_max_label - box_min_label
    
    label_Area_a, label_Area_b = tf.split(label_Area, [1, 1], -1)
    label_Area = label_Area_a * label_Area_b
    
    pred_Area = box_max_pred - box_min_pred
    pred_Area_a, pred_Area_b = tf.split(pred_Area, [1, 1], -1)
    pred_Area = pred_Area_a * pred_Area_b
    pred_Area = tf.abs(pred_Area)
    
    union_Area = label_Area + pred_Area - intersection_Area
    
    image_Area = yc.IMAGE_SIZE_WIDTH * yc.IMAGE_SIZE_HEIGHT 
    union_Area = tf.clip_by_value(union_Area, 0.0, image_Area)
    
    return union_Area

def calculate_IOU(box_values_label, box_values_pred):
    #First: convert box-parameters for IOU-calculations - from (x, y, width, height) to (x1, y1, x2, y2)
    box_min_label, box_max_label = convert_Box_Parameters_IOU(box_values_label)
    box_min_pred, box_max_pred = convert_Box_Parameters_IOU(box_values_pred)
    
    #Second: calculate intersection area
    intersection_Area = get_Intersection_Area(box_min_label, box_max_label, box_min_pred, box_max_pred)
    
    #Third: calculate union area
    union_Area = get_Union_Area(box_min_label, box_max_label, box_min_pred, box_max_pred, intersection_Area)
    
    #Fourth: calculate Intersection over Union (IOU)
    iou = tf.math.divide(intersection_Area, union_Area)
    
    return iou

In [6]:
#calculate masks for iou and object identification. 
def calculate_Masks(iou, boxes):
    #calculate iou-mask. Out of every box per cell only the box with the greatest iou value is responsible. 
    #Set the value of that box in iou-mask to 1. The other boxes are set to 0
    #Multiply later with iou-mask so the loss will be set to 0 for every box not responsible
    iou_box1, iou_box2 = tf.split(iou, [1, 1], -2)

    zero = tf.zeros_like(iou_box1)
    one = tf.ones_like(zero)
    
    iou_box1_new = tf.where(iou_box1 > iou_box2, one, zero)
    iou_box2_new = one - iou_box1_new
    iou_mask = tf.concat([iou_box1_new, iou_box2_new], -2)
    
    #calculate object-mask. If an object exists in that cell set the value to 1, otherwise 0
    zero = tf.zeros_like(boxes)
    one = tf.ones_like(boxes)
    obj_mask = tf.where(tf.equal(boxes,0.), zero, one)
    obj_mask = tf.reshape(obj_mask, [yc.CELL_COUNT, yc.COUNT_BOUNDING_BOXES, 1])

    #calculate no-object-mask
    noobj_mask = one - obj_mask

    return iou_mask, obj_mask, noobj_mask

In [7]:
def calculate_Position_Loss(label_x, label_y, pred_x, pred_y, mask):    
    #calculate (xi − x^i)2 loss for every box and every cell
    x_loss = tf.math.square(label_x - pred_x)
    x_loss = tf.math.multiply(x_loss, mask)

    #calculate (yi − y^i)2 loss for every box and every cell 
    y_loss = tf.math.square(label_y - pred_y)
    y_loss = tf.math.multiply(y_loss, mask)
    
    #sum-up the loss for x and y coordinates (xi − x^i)2 + (yi − y^i)2
    loss = tf.math.add(x_loss, y_loss)
    
    return tf.reduce_sum(loss)

def calculate_Scalar_Loss(label_width, label_height, pred_width, pred_height, mask):
    #make sure there are no 0 values in prediction tensors, because of sqrt-operation
    zero = tf.zeros_like(pred_width)
    pred_width = tf.where(pred_width < 0.0, zero, pred_width)
    pred_height = tf.where(pred_height < 0.0, zero, pred_height)
    sqrt_offset = tf.ones_like(pred_width)
    sqrt_offset = sqrt_offset * 0.0001
    pred_width = tf.where(tf.equal(pred_width, 0.0), sqrt_offset, pred_width)
    pred_height = tf.where(tf.equal(pred_height, 0.0), sqrt_offset, pred_height)
    
    #calculate (sqrt(wi) − sqrt(w'i))2 loss for every box and every cell
    w_loss = tf.math.square(tf.math.sqrt(pred_width) - tf.math.sqrt(label_width))
    w_loss = tf.math.multiply(w_loss, mask)
    
    #calculate (sqrt(hi) − sqrt(h'i))2 loss for every box and every cell 

    h_loss = tf.math.square(tf.math.sqrt(pred_height) - tf.math.sqrt(label_height))
    h_loss = tf.math.multiply(h_loss, mask)
    
    #sum-up the loss for width and height (sqrt(wi) − sqrt(w'i))2 + (sqrt(hi) − sqrt(h'i))2
    loss = tf.math.add(w_loss, h_loss)

    #sum all losses in a scalar value
    return tf.reduce_sum(loss)

def calculate_Confidence_Loss(iou, confidence_values_pred, obj_iou_mask, noobj_iou_mask):
    loss = tf.math.square(iou - confidence_values_pred)
    
    #First: Calculate confidence loss for cells with objects
    #check which cell and box are responsible for loss calculation. Boxes and cells that are not responsible are set to 0 loss value
    obj_loss = tf.math.multiply(loss, obj_iou_mask)
    
    #Second: Calculate confidence loss for cells without objects
    #check which cell and box are responsible for loss calculation. Boxes and cells that are not responsible are set to 0 loss value
    noobj_loss = tf.math.multiply(loss, noobj_iou_mask)
    #multiiply noobj loss with NO_OBJECT_LOSS_MULTIPLIER
    noobj_loss *= yc.NO_OBJECT_LOSS_MULTIPLIER
    
    #sum-up the loss for cells with and without objects
    loss = tf.math.add(obj_loss, noobj_loss)
    
    #sum all losses in a scalar value
    return tf.reduce_sum(loss)

def calculate_Class_Loss(class_values_label, class_values_pred):
    #create a mask to ignore all cells without object
    #Inside the cells with object every value is set to 1
    mask = tf.reduce_max(class_values_label, -1)
    mask = tf.reshape(mask, [yc.CELL_COUNT, 1])
    mask = tf.tile(mask,[1, yc.COUNT_CLASSES])
    
    loss = tf.math.square(class_values_label - class_values_pred)
    loss = tf.multiply(mask, loss)
    
    #sum all losses in a scalar value
    return tf.reduce_sum(loss)

def yoloLoss(label, prediction):
    label = tf.cast(label, tf.float32)
    label = tf.squeeze(label)
    prediction = tf.squeeze(prediction)
    
    #reshape and slice label and prediction tensors
    class_values_label, box_values_label = reshape_Label(label)
    class_values_pred, box_values_pred, confidence_values_pred = reshape_Prediction_Output(prediction)
    
    label_x, label_y, label_width, label_height  = tf.split(box_values_label, [1, 1, 1, 1], -1)
    pred_x, pred_y, pred_width, pred_height  = tf.split(box_values_pred, [1, 1, 1, 1], -1)
    
    #calculate IOU for each box
    iou = calculate_IOU(box_values_label, box_values_pred)
    
    #calculate 1-0-Masks
    iou_mask, obj_mask, noobj_mask = calculate_Masks(iou, label_width)
    obj_iou_mask = tf.math.multiply(iou_mask, obj_mask)
    
    
    #calculate position loss
    position_Loss = calculate_Position_Loss(label_x, label_y, pred_x, pred_y, obj_iou_mask)
    #multiiply position loss with localisation loss multiplier
    position_Loss *= yc.LOCALISATION_LOSS_MULTIPLIER
    
    #calculate scalar loss (height and width)
    scalar_Loss = calculate_Scalar_Loss(label_width, label_height, pred_width, pred_height, obj_iou_mask)
    #multiiply scalar loss with localisation loss multiplier
    scalar_Loss *= yc.LOCALISATION_LOSS_MULTIPLIER
    
    #calculate confidence loss
    noobj_iou_mask = tf.math.multiply(iou_mask, noobj_mask)
    confidence_Loss = calculate_Confidence_Loss(iou, confidence_values_pred, obj_iou_mask, noobj_iou_mask)
    
    #calculate class loss
    class_Loss = calculate_Class_Loss(class_values_label, class_values_pred)
    
    loss = tf.math.add_n([position_Loss, scalar_Loss, confidence_Loss, class_Loss])
    return loss

In [8]:
#recreate model from file
model = tf.keras.models.load_model('.//yolo_v1.h5')

#create a new model for training
#model = yolo_utils.createYOLO_v1_Model(tiny = True)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [10]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.0003)
loss_history = []

def train(epochs):
    for epoch in range(epochs):
        loss_sum = 0.0
        imagecount = 0
        for (batch, (images, labels)) in enumerate(image_output_ds):
            imagecount += 1
            with tf.GradientTape() as tape:
                predictions = model(images, training=True)
                loss_value = yoloLoss(labels, predictions)
                loss_sum += loss_value

            loss_history.append(loss_value.numpy())
            grads = tape.gradient(loss_value, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables),global_step=tf.train.get_or_create_global_step())
        print("loss_sum: ", loss_sum)

In [11]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.00003)
train(3)

Instructions for updating:
Use tf.cast instead.
loss_sum:  tf.Tensor(54.151302, shape=(), dtype=float32)
loss_sum:  tf.Tensor(55.338486, shape=(), dtype=float32)
loss_sum:  tf.Tensor(56.06718, shape=(), dtype=float32)


In [12]:
model.save('.//yolo_v1.h5')

In [9]:
#Check the quality of the net. In this case the validation dataset is the same as the training dataset.
#This is done because of bad results with the tiny YOLO version.  
VAL_IMAGE_PATHS = yolo_utils.getImageAdresses(yc.VAL_IMAGE_LIST, yc.dataDir + yc.validationPath)
val_Image_Dataset = yolo_utils.createImageDataset(VAL_IMAGE_PATHS)
IOU_VALUE = 0.7

def processPredictions(prediction, image, image_Number):
    prediction = tf.squeeze(prediction)
    image = tf.squeeze(image)
    class_values_pred, box_values_pred, confidence_values_pred = reshape_Prediction_Output(prediction)
    box_min_pred, box_max_pred = convert_Box_Parameters_IOU(box_values_pred)
    confidence = confidence_values_pred.numpy()
    tl = box_min_pred.numpy() 
    br = box_max_pred.numpy()
    image = image.numpy() * 255.0
    filename = ".//YOLO//" + str(image_Number) + ".jpg"
    write = cv2.imwrite(filename,image)
    
    for cell in range(yc.CELL_COUNT):
        for box in range(yc.COUNT_BOUNDING_BOXES):
            if (confidence[cell][box][0] > IOU_VALUE):
                pt1 = (tl[cell][box][0], tl[cell][box][1])
                pt2 = (br[cell][box][0], br[cell][box][1])
                image = cv2.rectangle(image, pt1, pt2, (200,0,0), 4)
                if (class_values_pred[cell][0] > 0.5):
                    image = cv2.putText(image, "Cat", pt1, cv2.FONT_HERSHEY_COMPLEX, 1, (255,0,0), 2)
                if ( class_values_pred[cell][1] > 0.5):
                    image = cv2.putText(image, "Dog", pt1, cv2.FONT_HERSHEY_COMPLEX, 1, (255,0,0), 2)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    cv2.imwrite(filename,image)

In [10]:
#Check before training. Create images to check if image annotations are correctly transferred into the image
def processLabels(labels, image, image_Number):
    labels = tf.squeeze(labels)
    image = tf.squeeze(image)
    class_values, box_values = reshape_Label(labels)
    box_values = tf.to_float(box_values)
    box_min, box_max = convert_Box_Parameters_IOU(box_values)
    tl = box_min.numpy() 
    br = box_max.numpy()
    image = image.numpy() * 255.0
    filename = ".//YOLO//" + str(image_Number) + ".jpg"
    write = cv2.imwrite(filename,image)
    
    for cell in range(yc.CELL_COUNT):
        for box in range(yc.COUNT_BOUNDING_BOXES):
            pt1 = (tl[cell][box][0], tl[cell][box][1])
            pt2 = (br[cell][box][0], br[cell][box][1])
            image = cv2.rectangle(image, pt1, pt2, (200,0,0), 4)
            if (class_values[cell][0] > 0.5):
                image = cv2.putText(image, "Cat", pt1, cv2.FONT_HERSHEY_COMPLEX, 1, (255,0,0), 2)
            if ( class_values[cell][1] > 0.5):
                image = cv2.putText(image, "Dog", pt1, cv2.FONT_HERSHEY_COMPLEX, 1, (255,0,0), 2)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    cv2.imwrite(filename,image)

In [11]:
#Make predictions and test the neural net
#Make sure there is a "YOLO"-directory in the directory of this notebook

image_Number = 0
for (batch, (image, labels)) in enumerate(image_output_ds):
    prediction = model(image)
    processPredictions(prediction, image, image_Number)
    #processLabels(labels, image, image_Number)
    image_Number += 1
    

Instructions for updating:
Use tf.cast instead.
