In [None]:
import argparse
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import scipy.io
import scipy.misc
import numpy as np
import pandas as pd
import PIL
from PIL import ImageFont, ImageDraw, Image
import tensorflow as tf
from tensorflow.python.framework.ops import EagerTensor

from tensorflow.keras.models import load_model
from yad2k.models.keras_yolo import yolo_head
from yad2k.utils.utils import draw_boxes, get_colors_for_classes, scale_boxes, read_classes, read_anchors, preprocess_image

%matplotlib inline

In [None]:
def yolo_filter_boxes(boxes, box_confidence, box_class_probs, threshold=0.6):
    """
    Filters YOLO boxes by thresholding on object and class confidence.

    Arguments:
        boxes -- tensor of shape (19, 19, 5, 4)
        box_confidence -- tensor of shape (19, 19, 5, 1)
        box_class_probs -- tensor of shape (19, 19, 5, 80)
        threshold -- float, boxes with class probability score below this are discarded

    Returns:
        scores -- tensor of shape (None,), class probability score for selected boxes
        boxes -- tensor of shape (None, 4), coordinates (b_x, b_y, b_h, b_w) of selected boxes
        classes -- tensor of shape (None,), index of the class detected by the selected boxes
    """
    box_scores = box_class_probs * box_confidence
    box_classes = tf.math.argmax(box_scores, axis=-1)
    box_class_scores = tf.math.reduce_max(box_scores, axis=-1)
    filtering_mask = box_class_scores >= threshold
    scores = tf.boolean_mask(box_class_scores, filtering_mask)
    boxes = tf.boolean_mask(boxes, filtering_mask)
    classes = tf.boolean_mask(box_classes, filtering_mask)
    
    return scores, boxes, classes


In [None]:
def iou(box1, box2):
    """
    Computes the Intersection over Union (IoU) between two bounding boxes.

    Arguments:
        box1 -- list with coordinates (x1, y1, x2, y2)
        box2 -- list with coordinates (x1, y1, x2, y2)

    Returns:
        iou -- float, intersection over union value
    """
    (box1_x1, box1_y1, box1_x2, box1_y2) = box1
    (box2_x1, box2_y1, box2_x2, box2_y2) = box2

    xi1 = max(box1_x1, box2_x1)
    yi1 = max(box1_y1, box2_y1)
    xi2 = min(box1_x2, box2_x2)
    yi2 = min(box1_y2, box2_y2)
    inter_width = max(0, yi2 - yi1)
    inter_height = max(0, xi2 - xi1)
    inter_area = inter_width * inter_height

    box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1)
    box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1)
    union_area = box1_area + box2_area - inter_area

    iou = inter_area / union_area
    return iou


In [None]:
def yolo_non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5):
    """
    Applies Non-max suppression (NMS) to a set of boxes.

    Arguments:
        scores -- tensor of shape (None,), confidence scores for each box
        boxes -- tensor of shape (None, 4), bounding box coordinates
        classes -- tensor of shape (None,), class labels for each box
        max_boxes -- int, maximum number of boxes to keep
        iou_threshold -- float, IoU threshold for suppressing overlapping boxes

    Returns:
        scores -- tensor of shape (None,), filtered confidence scores
        boxes -- tensor of shape (None, 4), filtered bounding box coordinates
        classes -- tensor of shape (None,), filtered class labels
    """
    boxes = tf.cast(boxes, dtype=tf.float32)
    scores = tf.cast(scores, dtype=tf.float32)

    nms_indices = []
    classes_labels = tf.unique(classes)[0]

    for label in classes_labels:
        filtering_mask = classes == label
        boxes_label = tf.boolean_mask(boxes, filtering_mask)
        scores_label = tf.boolean_mask(scores, filtering_mask)

        if tf.shape(scores_label)[0] > 0:
            nms_indices_label = tf.image.non_max_suppression(
                boxes=boxes_label,
                scores=scores_label,
                max_output_size=max_boxes,
                iou_threshold=iou_threshold
            )

            selected_indices = tf.squeeze(tf.where(filtering_mask), axis=1)
            nms_indices.append(tf.gather(selected_indices, nms_indices_label))

    nms_indices = tf.concat(nms_indices, axis=0)

    scores = tf.gather(scores, nms_indices)
    boxes = tf.gather(boxes, nms_indices)
    classes = tf.gather(classes, nms_indices)

    sort_order = tf.argsort(scores, direction='DESCENDING').numpy()
    scores = tf.gather(scores, sort_order[:max_boxes])
    boxes = tf.gather(boxes, sort_order[:max_boxes])
    classes = tf.gather(classes, sort_order[:max_boxes])

    return scores, boxes, classes


In [None]:
def yolo_boxes_to_corners(box_xy, box_wh):
    """
    Convert YOLO box predictions (center coordinates and width/height)
    to bounding box corners (y_min, x_min, y_max, x_max).

    Arguments:
        box_xy -- tensor of shape (..., 2), center coordinates (x, y)
        box_wh -- tensor of shape (..., 2), width and height (w, h)

    Returns:
        corners -- tensor of shape (..., 4), with (y_min, x_min, y_max, x_max)
    """
    box_mins = box_xy - (box_wh / 2.)
    box_maxes = box_xy + (box_wh / 2.)

    return tf.keras.backend.concatenate([
        box_mins[..., 1:2],  # y_min
        box_mins[..., 0:1],  # x_min
        box_maxes[..., 1:2],  # y_max
        box_maxes[..., 0:1]   # x_max
    ])


In [None]:
def yolo_eval(yolo_outputs, image_shape=(720, 1280), max_boxes=10, score_threshold=0.6, iou_threshold=0.5):
    """
    Converts the output of YOLO encoding to final predicted boxes, scores, and classes.

    Arguments:
        yolo_outputs -- tuple of 4 tensors:
            box_xy: (None, 19, 19, 5, 2)
            box_wh: (None, 19, 19, 5, 2)
            box_confidence: (None, 19, 19, 5, 1)
            box_class_probs: (None, 19, 19, 5, 80)
        image_shape -- tuple of (height, width) of the original image
        max_boxes -- int, maximum number of boxes to keep after NMS
        score_threshold -- float, boxes with score below this are discarded
        iou_threshold -- float, IoU threshold for NMS

    Returns:
        scores -- tensor of shape (None,), scores of selected boxes
        boxes -- tensor of shape (None, 4), coordinates of selected boxes
        classes -- tensor of shape (None,), class index for each selected box
    """
    box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs
    boxes = yolo_boxes_to_corners(box_xy, box_wh)
    scores, boxes, classes = yolo_filter_boxes(boxes, box_confidence, box_class_probs, score_threshold)
    boxes = scale_boxes(boxes, image_shape)
    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold)

    return scores, boxes, classes


In [None]:
class_names = read_classes("model_data/coco_classes.txt")
anchors = read_anchors("model_data/yolo_anchors.txt")
model_image_size = (608, 608)

In [None]:
yolo_model = load_model("model_data/", compile=False)

In [None]:
yolo_model.summary()

In [None]:
def predict(image_file):
    """
    Runs the YOLO model to predict bounding boxes for the input image.

    Arguments:
        image_file -- filename of the image in the "images" folder

    Returns:
        out_scores -- tensor of shape (None,), confidence scores for predicted boxes
        out_boxes -- tensor of shape (None, 4), coordinates of predicted boxes
        out_classes -- tensor of shape (None,), class indices for predicted boxes
    """
    image, image_data = preprocess_image("images/" + image_file, model_image_size=(608, 608))
    
    yolo_model_outputs = yolo_model(image_data)
    yolo_outputs = yolo_head(yolo_model_outputs, anchors, len(class_names))
    
    out_scores, out_boxes, out_classes = yolo_eval(
        yolo_outputs,
        image_shape=[image.size[1], image.size[0]],
        max_boxes=10,
        score_threshold=0.3,
        iou_threshold=0.5
    )

    print('Found {} boxes for {}'.format(len(out_boxes), image_file))

    colors = get_colors_for_classes(len(class_names))
    draw_boxes(image, out_boxes, out_classes, class_names, out_scores)
    
    output_path = os.path.join("out", image_file)
    image.save(output_path, quality=100)

    output_image = Image.open(output_path)
    imshow(output_image)

    return out_scores, out_boxes, out_classes
