In [20]:
!git clone https://github.com/allanzelener/yad2k.git

fatal: destination path 'yad2k' already exists and is not an empty directory.


In [1]:
import os
import tensorflow as tf
import numpy
import pandas
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from keras import backend as K
from keras.layers import Input, Lambda, Conv2D
from keras.models import load_model, Model

import argparse#
import PIL#
import scipy.io#
import scipy.misc
from yad2k.yad2k.models.keras_yolo import yolo_head, yolo_boxes_to_corners, preprocess_true_boxes, yolo_loss, yolo_body

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [None]:
# Input shape are images of size (b, 608, 608, 3)
# Output shape should look (m, 19, 19, 5, 85) 85: (pc,bx,by,bh,bw,c1,...,c80) 
# For each grid in 19x19 midpoint of an object and anchor box is assigned
# Then use Non-Max surpression to reduce output to smaller # of detected objects
  # Get rid of boxes with a low probability
  # Choose only one box when several boxes overlap with each other and detect the same object.


In [2]:
      # Filtering by thresholding over the class scores
def yolo_filter_boxes(box_object_presence, boxes, box_class_probs, threshold = .6):
  #box_object_presence, -- tensor of shape (19, 19, 5, 1)
  #boxes -- tensor of shape (19, 19, 5, 4)
  #box_class_probs -- tensor of shape (19, 19, 5, 80) contains class probabilities
  # for each of the 80 classes for each of the 5 boxes per cell.

  # Step 1: Compute box scores(probabilities)
    box_scores = box_object_presence * box_class_probs #(19*19, 5, 80)
    
    # Step 2: Find the index of the class with the maximum box score, the corresponding box score
    box_classes =  K.argmax(box_scores,axis = -1)
    box_class_scores = K.max(box_scores, axis = -1)
    
    # Step 3: Create a filtering mask based on "box_class_scores" by using "threshold". The mask should have the
    # same dimension as box_class_scores, and be True for the boxes you want to keep (with probability >= threshold)
    filtering_mask = box_class_scores >= threshold
    
    # Step 4: Apply the mask to scores, boxes and classes to remove useless boxes
    scores = tf.boolean_mask(box_class_scores, filtering_mask, name = 'boolean_mask')  # Keeping a score of boxes
    boxes = tf.boolean_mask(boxes, filtering_mask,  name = 'boolean_mask')             # Keeping only appropriate boxes
    classes = tf.boolean_mask(box_classes, filtering_mask, name = 'boolean_mask')      # Keeps index of the class

    return scores, boxes, classes

In [3]:
        # Implement Intersection over Union (measuring accuracy)
def iou(box1, box2):

    # (x1, y1) upper left, (x2, y2) lower right corners of box
    (box1_x1, box1_y1, box1_x2, box1_y2) = box1
    (box2_x1, box2_y1, box2_x2, box2_y2) = box2
    
    # Calculate the (yi1, xi1, yi2, xi2) coordinates of the intersection of box1 and box2.
    xi1 = np.max([box1[0], box2[0]]) #x-coordinate is closer to the right
    yi1 = np.max([box1[1], box2[1]]) #y-coordinate that is closer to the bottom
    xi2 = np.min([box1[2], box2[2]]) #x-coordinate is closer to the left
    yi2 = np.min([box1[3], box2[3]]) #y-coordinate that is closer to the top
    inter_area=max(xi2-xi1,0)*max(yi2-yi1,0)  # Area = width * height

    #  Union(A,B) = A + B - Inter(A,B)
    box1_area = (box1[3] - box1[1]) * (box1[2] - box1[0])
    box2_area = (box2[3] - box2[1]) * (box2[2] - box2[0])
    union_area = box1_area + box2_area - inter_area
    
    # compute the IoU
    iou = inter_area / union_area

    return iou

In [4]:
      # Implementing Non-Max
         # Select the box that has the highest score.
         # Compute the overlap of this box with all other boxes, and remove boxes that overlap significantly (iou >= iou_threshold).
         #Go back to step 1 and iterate until there are no more boxes with a lower score than the currently selected box.
def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5):
    max_boxes_tensor = K.variable(max_boxes, dtype='int32')     # tensor to be used in tf.image.non_max_suppression()
    K.get_session().run(tf.variables_initializer([max_boxes_tensor])) # initialize variable max_boxes_tensor

    nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor , iou_threshold = 0.5)

    scores = K.gather(scores,nms_indices)
    boxes = K.gather(boxes,nms_indices)
    classes = K.gather(classes,nms_indices)


    return scores, boxes, classes

In [5]:
def scale_boxes(boxes, image_shape):
    """ Scale boxes back to original image shape."""
    height = image_shape[0]
    width = image_shape[1]
    image_dims = K.stack([height, width, height, width])
    image_dims = K.reshape(image_dims, [1, 4])
    boxes = boxes * image_dims
    return boxes

In [6]:
def yolo_eval(yolo_outputs, image_shape = (720., 1280.), max_boxes=10, score_threshold=.6, iou_threshold=.5):
    """
    Converts the output of YOLO encoding (a lot of boxes) to your predicted boxes along with their scores, box coordinates and classes.
    
    Arguments:
    yolo_outputs -- output of the encoding model (for image_shape of (608, 608, 3)), contains 4 tensors:
                    box_confidence: tensor of shape (None, 19, 19, 5, 1)
                    box_xy: tensor of shape (None, 19, 19, 5, 2)
                    box_wh: tensor of shape (None, 19, 19, 5, 2)
                    box_class_probs: tensor of shape (None, 19, 19, 5, 80)
    image_shape -- tensor of shape (2,) containing the input shape, in this notebook we use (608., 608.) (has to be float32 dtype)
    max_boxes -- integer, maximum number of predicted boxes you'd like
    score_threshold -- real value, if [ highest class probability score < threshold], then get rid of the corresponding box
    iou_threshold -- real value, "intersection over union" threshold used for NMS filtering
    
    Returns:
    scores -- tensor of shape (None, ), predicted score for each box
    boxes -- tensor of shape (None, 4), predicted box coordinates
    classes -- tensor of shape (None,), predicted class for each box
    """

    # Retrieve outputs of the YOLO model 
    box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs

    # Convert boxes to be ready for filtering functions 
    boxes = yolo_boxes_to_corners(box_xy, box_wh)

    # Filtering
    scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes,box_class_probs,score_threshold)
    
    # Scale to original
    boxes = scale_boxes(boxes, image_shape)

    # Filtering with non-max
    scores, boxes, classes =yolo_non_max_suppression(scores, boxes,classes,max_boxes = 10, iou_threshold = 0.5)
    
    
    return scores, boxes, classes

Summary for YOLO:
                                                      
    Input image (608, 608, 3)
    The input image goes through a CNN, resulting in a (19,19,5,85) dimensional output.
    After flattening the last two dimensions, the output is a volume of shape (19, 19, 425):
    Each cell in a 19x19 grid over the input image gives 425 numbers.
    425 = 5 x 85 because each cell contains predictions for 5 boxes, corresponding to 5 anchor boxes, as seen in lecture.
    85 = 5 + 80 where 5 is because  (pc,bx,by,bh,bw)(pc,bx,by,bh,bw)  has 5 numbers, and 80 is the number of classes we'd like to detect
    You then select only few boxes based on:
    Score-thresholding: throw away boxes that have detected a class with a score less than the threshold
    Non-max suppression: Compute the Intersection over Union and avoid selecting overlapping boxes
    This gives you YOLO's final output

In [7]:
# Gonna need a session to execute the computation graph and evaluate the tensors.

sess = tf.compat.v1.keras.backend.get_session()

def read_classes(classes_path):
    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names

def read_anchors(anchors_path):
    with open(anchors_path) as f:
        anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        anchors = numpy.array(anchors).reshape(-1, 2)
    return anchors

class_names = read_classes("/content/drive/My Drive/model_data_coco_classes.txt")
anchors = read_anchors("/content/drive/My Drive/Week 03_Car detection for Autonomous Driving_model_data_yolo_anchors.txt")
image_shape = (720., 1280.)    



In [8]:
#!git clone https://github.com/allanzelener/YAD2K.git
#https://github.com/JudasDie/deeplearning.ai/issues/2
#https://github.com/allanzelener/YAD2K
!pip install tensorflow==1.14.0 
!pip install q keras==2.3.1
os.chdir("/content/YAD2K")
!wget https://pjreddie.com/media/files/yolov2-voc.weights
!wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov2-voc.cfg
!python3 yad2k.py yolov2-voc.cfg yolov2-voc.weights model_data/yolov2-voc.h5


--2020-09-05 12:03:31--  https://pjreddie.com/media/files/yolov2-voc.weights
Resolving pjreddie.com (pjreddie.com)... 128.208.4.108
Connecting to pjreddie.com (pjreddie.com)|128.208.4.108|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 202704264 (193M) [application/octet-stream]
Saving to: ‘yolov2-voc.weights.7’


2020-09-05 12:04:38 (2.90 MB/s) - ‘yolov2-voc.weights.7’ saved [202704264/202704264]

--2020-09-05 12:04:38--  https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov2-voc.cfg
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2721 (2.7K) [text/plain]
Saving to: ‘yolov2-voc.cfg.7’


2020-09-05 12:04:38 (49.4 MB/s) - ‘yolov2-voc.cfg.7’ saved [2721/2721]

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int

In [16]:
trained_model = load_model('model_data/yolov2-voc.h5', compile=False)
#https://stackoverflow.com/questions/53295570/userwarning-no-training-configuration-found-in-save-file-the-model-was-not-c

In [17]:
trained_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 416, 416, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 416, 416, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 416, 416, 32) 128         conv2d_1[0][0]                   
__________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)       (None, 416, 416, 32) 0           batch_normalization_1[0][0]      
____________________________________________________________________________________________

In [18]:
#Convert output of the model to usable bounding box tensors
yolo_outputs = yolo_head(trained_model.output, anchors, len(class_names))

In [21]:
# Filtering Boxes
scores, boxes, classes = yolo_eval(yolo_outputs, (720., 1280.))

ValueError: ignored