In [3]:
import argparse
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import scipy.io
import scipy.misc
import numpy as np
import pandas as pd
import PIL
import tensorflow as tf
from keras import backend as K
from keras.layers import Input, Lambda, Conv2D
from keras.models import load_model, Model
from yolo_utils import read_classes, read_anchors, generate_colors, preprocess_image, draw_boxes, scale_boxes
from yad2k.models.keras_yolo import yolo_head, yolo_boxes_to_corners, preprocess_true_boxes, yolo_loss, yolo_body

In [6]:
a = np.random.randn(19*19, 5, 1)
b = np.random.randn(19*19, 5, 80)
c = a*b
c.shape

(361, 5, 80)

In [7]:
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.6):
    """
    Filters YOLO boxes by thresholding on object class confidence.
    
    Arguments:
        box_confidence -- tensor of shape (19, 19, 5 ,1)
        boxes -- tensor of shape (19, 19, 5, 4)
        box_class_probs -- tensor of shape (19, 19, 5, 80)
        threshold -- real value, if [higjest class probability score < threshold], then get rid of the corresponding box
    Returns:
        scores -- tensor of shape (None,), containing the class probability score for selected boxes
        boxes  -- tensor of shape(None, 4), containing(b_x, b_y, b_h, b_w) cordinates of selected boxes
        classes -- tensor of shape(None,), containing the index of the class detected by the selected boxes
    """
    #Step1: Compute box scores
    box_scores = box_confidence * box_class_probs
    
    #Step2: Find the box_classes thanks to the max box_scores, keep track of the corresponding score
    box_classes = K.argmax(box_scores, axis=-1)
    box_class_scores = K.max(box_scores, axis = -1)
    
    #Step3: Create a filtering mask based on "box_class_scores" by using "threshold". The mask should have the 
            # same dimensionas box_class_scores, and be True for the voxes you want to keep(with probability >= threshold)
    filtering_mask = box_class_scores >= threshold
    
    #Step4: Apply the mask to scores, boxes and classes
    scores = tf.boolean_mask(box_class_scores, filtering_mask)
    boxes = tf.boolean_mask(boxes, filtering_mask)
    classes = tf.boolean_mask(box_classes, filtering_mask)
    
    return scores, boxes, classes

In [10]:
def iou(box1, box2):
    """
    Implement the intersection over union (IoU) between box1 and vox2
    
    Arguments:
        box1 -- first box, list object with coordinates (x1, y1, x2, y2)
        box2 -- second box, list object with coordinates (x1, y1, x2,y2)
    """
    # Calculate the (y1, x1, y2, x2) coordinates of the intersection of box1 and box2. Calculate its Area.
    xi1 = max(box1[0], box2[0])
    yi1 = max(box1[1], box2[1])
    xi2 = min(box1[2], box2[2])
    yi2 = min(box1[3], box2[3])
    inter_area = (xi2 - xi1)*(yi2 - yi1)
    
    # Caculate the Union area by using Formula: Union(A,B)=A+B - Inter(A,B)
    box1_area = (box1[2] - box1[0])*(box1[3] - box1[1])
    box2_area = (box2[2] - box2[0])*(box2[3] - box2[1])
    union_area = box1_area + box2_area -inter_area
    
    iou = inter_area / union_area
    
    return iou

In [11]:
box1 = (2,1,4,3)
box2 = (1,2,3,4)
print("iou = " + str(iou(box1, box2)))

iou = 0.14285714285714285


In [13]:
def yolo_non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5):
    max_boxes_tensor = K.variable(max_boxes, dtype='int32')
    K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
    
    nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_threshold)
    
    scores = K.gather(scores, nms_indices)
    boxes = K.gather(boxes, nms_indices)
    classes = K.gather(classes, nms_indices)
    
    return scores, boxes, classes

In [14]:
def yolo_eval(yolo_outputs, image_shape=(720., 1280.), max_boxes=10, score_threshold=0.6, iou_threshold=0.5):
    """
    Converts the output of YOLO encoding (a lot of boxes) to your predicted boxes along with theirscores, box coordinates and classes.
    
    Arguments:
        yolo_outputs -- output of the encoding model (for image_shape of (608,608,3)), contains 4 tensors:
            box_confidence: tensor of shape (None, 19, 19, 5, 1)
            box_xy: tensor of shape (None, 19, 19, 5, 2)
            box_wh: tensor of shape (None, 19, 19, 5, 2)
            box_class_probs: tensor of shape (None, 19, 19, 5, 80)
        image_shape -- tensor of shape (2,) containing the input shape, in this notebook we use (608., 608.)
        max_boxes -- integer,maximum number of predicted boxes you'd like
        score_threshold -- real value, if [highest class probability score < threshold], then get rid of the 
                           corresponding box
        iou_threshold -- real value, "intersection over uinon" threshold used for NMS filtering
        
    Returns:
        scores -- tensor of shape (None, ), predicted score for each box
        boxes -- tensor of shape (None, 4), predicted box coordinates
        classes -- tensor of shape (None,), predicted class for each box
    """
    
    # Retrieve outputs of the YOLO model
    box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs
    
    # Convert boxes to be ready for filtering functions
    boxes = yolo_boxes_to_corners(box_xy, box_wh)
    
    # Use one of the functions you've implemented to perform Score-filtering with a threshold of score_threshold
    scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_prob, score_threshold)
    
    # Scale boxes back to original image shape
    boxes = scale_boxes(boxes, image_shape)
    
    # Use one of the functions you've implemented to perform Non-max suppression with a threshold of iou_threshold
    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold)
    
    return scores, boxes, classes

### Summary for YOLO:
1. Input image (608,608,3)
2. The input iamge goes through a CNN, resulting in a (19,19,5,85)dimensional output.
3. After flattening the last two dimensions, the output is a volume of shape(19,19,425):
    - Each cell in a 19*19 grid over the input image gives 425 numbers.
    - 425 = 5*85 because each cell contains predictions for 5 boxes, corresponding to 5 anchor boxes, as seen in         lecture.
    - 85 = 5 + 80 where 5 is because(pc,bx,by,bh,bw) has 5 numbers, and 80 is the number of classes we'd like to 
      detect.
4. You then select only few boxes based on:
    - Score-thresholding: throw away boxes that have detected a class with a scorce less than the threshold
    - Non-max suppression: Compute the Intersection over Union and avoid selecting overlapping boxes
5. This gives you YOLO's final output

In [15]:
sess = K.get_session()

In [16]:
class_names = read_classes("model_data/coco_classes.txt")
anchors = read_anchors("model_data/yolo_anchors.txt")
image_shape = (720., 1280.)

### Loading a pretrained model

In [18]:
yolo_model = load_model("model_data/yolo.h5")



In [19]:
yolo_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 608, 608, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 608, 608, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 608, 608, 32) 128         conv2d_1[0][0]                   
__________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)       (None, 608, 608, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
max_poolin

### add yolo_outputs to your graph

In [21]:
yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))

### Filtering boxes