# Convolutional Neural Network - YOLO Advanced
Real Time Camera Prediction
Video Predcition

In [1]:
import matplotlib.pyplot as plt

import numpy as np
import tensorflow as tf

from keras import backend as K
from keras.layers import Input, Lambda, Conv2D
from keras.models import load_model, Model
from keras.layers.merge import concatenate

import colorsys
import random

import PIL as pil
from PIL import ImageDraw,ImageFont,Image
import imageio
import glob
import cv2
import os
import time

Using TensorFlow backend.


# Helper Functions

In [2]:
def read_classes(classes_path):
    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names

def read_anchors(anchors_path):
    with open(anchors_path) as f:
        anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        anchors = np.array(anchors).reshape(-1, 2)
    return anchors

def yolo_head(feats, anchors, num_classes):   
    num_anchors = len(anchors)

    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last

    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)

    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])

    conv_index = K.cast(conv_index, dtype='float32')
    feats = K.reshape(feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.

    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_confidence,box_xy, box_wh,  box_class_probs

def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = .6):
    # Step 1: Compute box scores
    box_scores = box_confidence * box_class_probs
    
    # Step 2: Find the box_classes thanks to the max box_scores, keep track of the corresponding score
    box_classes = K.argmax(box_scores, axis = -1)
    box_class_scores = K.max(box_scores, axis = -1)
    
    # Step 3: Create a filtering mask based on "box_class_scores" by using "threshold". The mask should have the
    # same dimension as box_class_scores, and be True for the boxes you want to keep (with probability >= threshold)
    filtering_mask = (box_class_scores >= threshold)
    
    # Step 4: Apply the mask to scores, boxes and classes
    scores = tf.boolean_mask(box_class_scores, filtering_mask)
    boxes = tf.boolean_mask(boxes, filtering_mask)
    classes = tf.boolean_mask(box_classes, filtering_mask)
    
    return scores, boxes, classes

def iou(box1, box2):
    # Calculate the (y1, x1, y2, x2) coordinates of the intersection of box1 and box2. Calculate its Area.
    xi1 = np.maximum(box1[0], box2[0])
    yi1 = np.maximum(box1[1], box2[1])
    xi2 = np.minimum(box1[2], box2[2])
    yi2 = np.minimum(box1[3], box2[3])
    inter_area = np.abs((xi1 - xi2) * (yi1 - yi2))

    # Calculate the Union area by using Formula: Union(A,B) = A + B - Inter(A,B)
    box1_area = np.abs((box1[2] - box1[0]) * (box1[3] - box1[1]))
    box2_area = np.abs((box2[2] - box2[0]) * (box2[3] - box2[1]))
    union_area = box1_area + box2_area - inter_area
   
    # compute the IoU
    iou = inter_area / union_area

    return iou

def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5): 
    max_boxes_tensor = K.variable(max_boxes, dtype='int32')     # tensor to be used in tf.image.non_max_suppression()
    K.get_session().run(tf.variables_initializer([max_boxes_tensor])) # initialize variable max_boxes_tensor
    
    nms_indices = tf.image.non_max_suppression( boxes, scores, max_boxes_tensor, iou_threshold, name=None)
    scores = K.gather(scores, nms_indices)
    boxes = K.gather(boxes, nms_indices)
    classes = K.gather(classes, nms_indices)
    
    return scores, boxes, classes

def yolo_boxes_to_corners(box_xy, box_wh):    
    box_mins = box_xy - (box_wh / 2.)
    box_maxes = box_xy + (box_wh / 2.)

    return K.concatenate([
        box_mins[..., 1:2],  # y_min
        box_mins[..., 0:1],  # x_min
        box_maxes[..., 1:2],  # y_max
        box_maxes[..., 0:1]  # x_max
    ])

def scale_boxes(boxes, image_shape):    
    height = image_shape[0]
    width = image_shape[1]
    image_dims = K.stack([height, width, height, width])
    image_dims = K.reshape(image_dims, [1, 4])
    boxes = boxes * image_dims
    return boxes

def yolo_eval(yolo_outputs, image_shape = (720., 1280.), max_boxes=10,score_threshold=.6, iou_threshold=.5):    
    # Retrieve outputs of the YOLO model
    box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs

    # Convert boxes to be ready for filtering functions 
    boxes = yolo_boxes_to_corners(box_xy, box_wh)

    # Use one of the functions you've implemented to perform Score-filtering with a threshold of score_threshold
    scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, score_threshold)
    
    # Scale boxes back to original image shape.
    boxes = scale_boxes(boxes, image_shape)

    # Use one of the functions you've implemented to perform Non-max suppression with a threshold of iou_threshold 
    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold)
        
    return scores, boxes, classes

def generate_colors(class_names):
    hsv_tuples = [(x / len(class_names), 1., 1.) for x in range(len(class_names))]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
    random.seed(10101)  # Fixed seed for consistent colors across runs.
    random.shuffle(colors)  # Shuffle colors to decorrelate adjacent classes.
    random.seed(None)  # Reset seed to default.
    return colors

####################################################################################
def draw_boxes(image, out_scores, out_boxes, out_classes, class_names, 
               colors,displayPredictionSummary=False):  
    
    fontSize=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')
    font = pil.ImageFont.truetype(font='arial.ttf',size=fontSize)
    thickness = (image.size[0] + image.size[1]) // 300

    for i, c in reversed(list(enumerate(out_classes))):
        predicted_class = class_names[c]
        box = out_boxes[i]
        score = out_scores[i]

        label = '       {} {:.2f}'.format(predicted_class, score)

        draw = pil.ImageDraw.Draw(image)
        label_size = draw.textsize(label,font)

        top, left, bottom, right = box
        top = max(0, np.floor(top + 0.5).astype('int32'))
        left = max(0, np.floor(left + 0.5).astype('int32'))
        bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
        right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
        if(displayPredictionSummary):
            print(label, (left, top), (right, bottom))

        if top - label_size[1] >= 0:
            text_origin = np.array([left, top - label_size[1]])
        else:
            text_origin = np.array([left, top + 1])

        for i in range(thickness):
            draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[c])
        draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=colors[c])
        draw.text(text_origin, label, fill=(0, 0, 0), font=font)
        del draw

        
def preprocess_image(img_path, model_image_size):
    image = pil.Image.open(img_path)
    resized_image = image.resize(tuple(reversed(model_image_size)), pil.Image.BICUBIC)
    image_data = np.array(resized_image, dtype='float32')
    image_data /= 255.
    image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
    return image, image_data


def CreateDirRecursive(file_path):
    directory = os.path.dirname(file_path)
    if(directory!=""):
        os.makedirs(directory, exist_ok=True)

        
        


        
def PredictImage(Sess, img,skipPrediction=False,out_scores=None,out_boxes=None, out_classes=None):
    #CV2 to PIL Format
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    image = pil.Image.fromarray(img)
    
    
    model_image_size = (608, 608)
    resized_image = image.resize(tuple(reversed(model_image_size)), pil.Image.BICUBIC)
    image_data = np.array(resized_image, dtype='float32')
    image_data /= 255.
    image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
    
    
    # Run the session with the correct tensors and placeholders in the feed_dict.
    if(skipPrediction==False):
        out_scores,out_boxes, out_classes = sess.run([scores, boxes, classes],
                                                     feed_dict = {yolo_model.input:image_data,
                                                                  K.learning_phase():0})
    
   
    
    # Generate colors for drawing bounding boxes.
    colors = generate_colors(class_names)
    
    # Draw bounding boxes on the image file
    draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
    
    #PIL to cv2 
    image = np.asarray(image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    return image,out_scores,out_boxes, out_classes



        
def PredictFile(Sess, inputFileName,outputFileName="outputImage.jpg",
            displayPredictionSummary=False):
    
    # Preprocess your image
    image, image_data = preprocess_image(inputFileName, model_image_size = (608, 608))

    # Run the session with the correct tensors and placeholders in the feed_dict.
    out_scores,out_boxes, out_classes = sess.run([scores, boxes, classes], 
                                                  feed_dict = {yolo_model.input:image_data, 
                                                               K.learning_phase():0})
    
   
    # Print predictions info
    if(displayPredictionSummary):
        print('Found {} boxes for {}'.format(len(out_boxes), inputFileName))
    
    # Generate colors for drawing bounding boxes.
    colors = generate_colors(class_names)
    
    # Draw bounding boxes on the image file
    draw_boxes(image, out_scores, out_boxes, out_classes, 
               class_names, colors,displayPredictionSummary)
    
    # Save the predicted bounding box on the image
    
    CreateDirRecursive(outputFileName)
    image.save(outputFileName, quality=50)
    
    
    return outputFileName
def DisplayImage(imgFileName,style="popup"):
    winName='Image'
    img = cv2.imread(imgFileName)
    img = cv2.resize(img, dsize=(800,500))
    if(style.lower()=="popup"):
        while True:
            cv2.imshow(winName, img)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            if cv2.getWindowProperty(winName,1) == -1 :
                break
        cv2.destroyAllWindows()
    else:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.axis('off')
        plt.imshow(img)

## Main Program

In [3]:
sess = K.get_session()
class_names = read_classes("../Input/YoloDS/model_data/coco_classes.txt")
anchors = read_anchors("../Input/YoloDS/model_data/yolo_anchors.txt")
image_shape = (720., 1280.)    
yolo_model = load_model("../Input/YoloDS/model_data/yolo.h5")
yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))
scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)

Instructions for updating:
Colocations handled automatically by placer.




# Quick Test

In [4]:
inputFileName="../Input/YoloDS/images/test1.jpg"
outputFileName=inputFileName.replace("Input","Output")
outputFileName=Predict(sess,inputFileName,outputFileName,True)
DisplayImage(outputFileName,"popup")

NameError: name 'Predict' is not defined

# Images

In [77]:
FolderPath="../Input/YoloDS/images/RoadDrive"
imgIndex=0
winName='Test Drive'
imgList=glob.glob(FolderPath+"/*.jpg")
skipPrediction=False
out_scores=None
out_boxes=None
out_classes=None

while True:
    if (imgIndex>len(imgList)-1):
        imgIndex=0
    inputFileName=imgList[imgIndex]
    outputFileName=inputFileName.replace("Input","Output")
    PredictFile(sess,inputFileName,outputFileName)
    imgIndex=imgIndex+1
    img = cv2.imread(outputFileName)
    img=cv2.resize(img, dsize=(800,500))
    cv2.imshow(winName, img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    if cv2.getWindowProperty(winName,1) == -1 :
        break
    
cv2.destroyAllWindows()

# Video

In [4]:
vFileName='../input/YoloDS/Videos/BMW Vision Self Driving Car.mp4'
winName='BMW Vision Self Driving Car.mp4'
vidcap = cv2.VideoCapture(vFileName)
success,img = vidcap.read()
success = True
count=0
skipPrediction=False
out_scores=None
out_boxes=None
out_classes=None

while success:
    if (count%10!=0):  
        skipPrediction=True
    else:
        skipPrediction=False
    img,out_scores,out_boxes,out_classes= PredictImage(sess,img,skipPrediction,
                      out_scores,out_boxes,out_classes)
    img=cv2.resize(img, dsize=(800,500))

    cv2.imshow(winName, img)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    if cv2.getWindowProperty(winName,1) == -1 :
        break
    success,img = vidcap.read()
    count=count+1
    
video_capture.release()
cv2.destroyAllWindows()

NameError: name 'video_capture' is not defined

# Real Time Camera Prediction

In [5]:
video_capture = cv2.VideoCapture(cv2.CAP_DSHOW)
count = 0
winName='Live Camera'
skipPrediction=False
out_scores=None
out_boxes=None
out_classes=None

while True:
    #time.sleep(5)
    if (count%30!=0):  
        skipPrediction=True
    else:
        skipPrediction=False
           
    ret, frame = video_capture.read()
    
    img = cv2.flip( frame, 1 ) 
    
    img,out_scores,out_boxes,out_classes= PredictImage(sess,img,skipPrediction,
                      out_scores,out_boxes,out_classes)
    img=cv2.resize(img, dsize=(800,500))

    cv2.imshow(winName, img)
    count=count+1
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    if cv2.getWindowProperty(winName,1) == -1 :
        break
video_capture.release()
cv2.destroyAllWindows()