# Autonomous Driving - Car detection

## Import libraries

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Conv2D
from tensorflow.keras.models import Model
from matplotlib import pyplot as plt  # Import directly from matplotlib for consistency
from PIL import Image

# Custom utility functions
from yolo_utils import (
    read_classes, 
    read_anchors, 
    generate_colors, 
    preprocess_image, 
    draw_boxes, 
    scale_boxes
)
from yad2k.models.keras_yolo import (
    yolo_head, 
    yolo_boxes_to_corners, 
    preprocess_true_boxes, 
    yolo_loss, 
    yolo_body
)

from tensorflow.keras.models import load_model

%matplotlib inline


## 1 - Problem Statement

## 2 - YOLO

### 2.1 - More details

#### Inputs and outputs

#### Anchor Boxes

#### Encoding

#### class score

#### Visualizing classes

#### Visualizing bounding boxes

### 2.2 - Filtering with a threshold on a class scores

#### Implement `yolo_filter_boxes()`

In [None]:
# Modernized YOLO Filter Boxes Function
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.6):
    """
    Filters YOLO boxes by thresholding on object and class confidence.

    Args:
    box_confidence (tf.Tensor): Tensor of shape (19, 19, 5, 1) representing object confidence.
    boxes (tf.Tensor): Tensor of shape (19, 19, 5, 4) containing box coordinates (b_x, b_y, b_h, b_w).
    box_class_probs (tf.Tensor): Tensor of shape (19, 19, 5, 80) representing class probabilities.
    threshold (float): Minimum confidence threshold for retaining boxes.

    Returns:
    tuple:
        scores (tf.Tensor): Tensor of shape (None,) with class probability scores for selected boxes.
        boxes (tf.Tensor): Tensor of shape (None, 4) containing coordinates of selected boxes.
        classes (tf.Tensor): Tensor of shape (None,) containing class indices for selected boxes.
    """
    # Step 1: Compute box scores by multiplying object confidence with class probabilities
    box_scores = box_confidence * box_class_probs

    # Step 2: Find the class with the highest score for each box and its corresponding score
    box_classes = tf.argmax(box_scores, axis=-1)  # Index of the class with max score
    box_class_scores = tf.reduce_max(box_scores, axis=-1)  # Maximum class score

    # Step 3: Create a mask for boxes with scores above the threshold
    filtering_mask = box_class_scores >= threshold

    # Step 4: Apply the mask to filter scores, boxes, and classes
    scores = tf.boolean_mask(box_class_scores, filtering_mask)
    boxes = tf.boolean_mask(boxes, filtering_mask)
    classes = tf.boolean_mask(box_classes, filtering_mask)

    return scores, boxes, classes


In [None]:
#Tests the yolo_filter_boxes function with random input data.
# Generate random input tensors
box_confidence = tf.random.normal([19, 19, 5, 1], mean=1, stddev=4, seed=1)
boxes = tf.random.normal([19, 19, 5, 4], mean=1, stddev=4, seed=1)
box_class_probs = tf.random.normal([19, 19, 5, 80], mean=1, stddev=4, seed=1)

# Call the yolo_filter_boxes function
scores, filtered_boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.5)
# Display results
print("Example Scores:", scores[:3].numpy())  # Convert tensors to NumPy for display
print("Example Filtered Boxes:", filtered_boxes[:3].numpy())
print("Example Classes:", classes[:3].numpy())
print("Scores Shape:", scores.shape)
print("Boxes Shape:", filtered_boxes.shape)
print("Classes Shape:", classes.shape)

### 2.3 - Non max suppression

#### Implement IOU()

In [None]:
def iou(box1, box2):
    """
    Compute the Intersection over Union (IoU) between two bounding boxes.

    Args:
    box1 (list): First box with coordinates [x1, y1, x2, y2].
    box2 (list): Second box with coordinates [x1, y1, x2, y2].

    Returns:
    float: IoU value between 0 and 1.
    """

    # Assign variable names to coordinates for clarity
    box1_x1, box1_y1, box1_x2, box1_y2 = box1
    box2_x1, box2_y1, box2_x2, box2_y2 = box2

    # Calculate intersection coordinates and dimensions
    xi1 = np.maximum(box1_x1, box2_x1)
    yi1 = np.maximum(box1_y1, box2_y1)
    xi2 = np.minimum(box1_x2, box2_x2)
    yi2 = np.minimum(box1_y2, box2_y2)

    inter_width = max(xi2 - xi1, 0)  # Ensure non-negative width
    inter_height = max(yi2 - yi1, 0)  # Ensure non-negative height
    inter_area = inter_width * inter_height  # Intersection area

    # Calculate areas of both boxes
    box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1)
    box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1)

    # Calculate the union area
    union_area = box1_area + box2_area - inter_area

    # Compute IoU
    iou = inter_area / union_area if union_area > 0 else 0.0

    return iou


In [None]:
# Test case 1: Boxes intersect
box1 = (2, 1, 4, 3)
box2 = (1, 2, 3, 4)
print(f"IoU for intersecting boxes: {iou(box1, box2):.2f}")
# Test case 2: Boxes do not intersect
box1 = (1, 2, 3, 4)
box2 = (5, 6, 7, 8)
print(f"IoU for non-intersecting boxes: {iou(box1, box2):.2f}")
# Test case 3: Boxes intersect at vertices only
box1 = (1, 1, 2, 2)
box2 = (2, 2, 3, 3)
print(f"IoU for boxes touching at vertices: {iou(box1, box2):.2f}")
# Test case 4: Boxes intersect at edge only
box1 = (1, 1, 3, 3)
box2 = (2, 3, 3, 4)
print(f"IoU for boxes touching at edges: {iou(box1, box2):.2f}")


#### YOLO non-max suppression

In [None]:
def yolo_non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5):
    """
    Apply Non-Max Suppression (NMS) to filter overlapping bounding boxes.

    Args:
    scores (tf.Tensor): 1D tensor with shape (None,), confidence scores for each box.
    boxes (tf.Tensor): 2D tensor with shape (None, 4), bounding box coordinates.
    classes (tf.Tensor): 1D tensor with shape (None,), class predictions for each box.
    max_boxes (int): Maximum number of boxes to keep after applying NMS.
    iou_threshold (float): IOU threshold for deciding whether boxes overlap too much.

    Returns:
    Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
        scores: Filtered confidence scores.
        boxes: Filtered bounding box coordinates.
        classes: Filtered class predictions.
    """
    # Ensure max_boxes is respected by creating a tensor
    max_boxes_tensor = tf.constant(max_boxes, dtype=tf.int32, name="max_boxes")

    # Apply Non-Max Suppression to get indices of the boxes to keep
    nms_indices = tf.image.non_max_suppression(
        boxes=boxes,
        scores=scores,
        max_output_size=max_boxes,
        iou_threshold=iou_threshold
    )

    # Gather the selected indices to filter scores, boxes, and classes
    scores = tf.gather(scores, nms_indices)
    boxes = tf.gather(boxes, nms_indices)
    classes = tf.gather(classes, nms_indices)

    return scores, boxes, classes


In [None]:
# Test the yolo_non_max_suppression function
scores = tf.random.normal([54], mean=1, stddev=4, seed=1)
boxes = tf.random.normal([54, 4], mean=1, stddev=4, seed=1)
classes = tf.random.normal([54], mean=1, stddev=4, seed=1)

# Apply the NMS function
scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes)

# Display the results
print("scores[2] =", scores.numpy()[2])
print("boxes[2] =", boxes.numpy()[2])
print("classes[2] =", classes.numpy()[2])
print("scores.shape =", scores.numpy().shape)
print("boxes.shape =", boxes.numpy().shape)
print("classes.shape =", classes.numpy().shape)

### 2.4 Wrapping up the filtering

In [None]:
def yolo_eval(yolo_outputs, image_shape=(720., 1280.), max_boxes=10, score_threshold=0.6, iou_threshold=0.5):
    """
    Converts YOLO model outputs to filtered predictions (scores, boxes, and classes).

    Args:
    yolo_outputs (tuple): Contains 4 tensors from the YOLO model output:
        - box_confidence: Tensor of shape (None, 19, 19, 5, 1)
        - box_xy: Tensor of shape (None, 19, 19, 5, 2)
        - box_wh: Tensor of shape (None, 19, 19, 5, 2)
        - box_class_probs: Tensor of shape (None, 19, 19, 5, 80)
    image_shape (tuple): Shape of the input image, e.g., (720., 1280.).
    max_boxes (int): Maximum number of boxes to keep after filtering.
    score_threshold (float): Minimum score to keep a box.
    iou_threshold (float): IOU threshold for NMS filtering.

    Returns:
    Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
        scores: Filtered confidence scores.
        boxes: Filtered bounding box coordinates.
        classes: Filtered class predictions.
    """
    # Unpack YOLO model outputs
    box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs

    # Convert (box_xy, box_wh) to corner coordinates for compatibility
    boxes = yolo_boxes_to_corners(box_xy, box_wh)

    # Filter boxes based on confidence scores
    scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, score_threshold)

    # Scale boxes to match the original image dimensions
    boxes = scale_boxes(boxes, image_shape)

    # Apply Non-Max Suppression
    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold)

    return scores, boxes, classes


In [None]:
# Test the yolo_filter_boxes function by providing random YOLO outputs and evaluating results
# Generate random YOLO outputs for testing
yolo_outputs = (
    tf.random.normal([19, 19, 5, 1], mean=1, stddev=4, seed=1),  # box_confidence
    tf.random.normal([19, 19, 5, 2], mean=1, stddev=4, seed=1),  # box_xy
    tf.random.normal([19, 19, 5, 2], mean=1, stddev=4, seed=1),  # box_wh
    tf.random.normal([19, 19, 5, 80], mean=1, stddev=4, seed=1)  # box_class_probs
)
# Define image shape and filtering parameters
image_shape = (720., 1280.)  # Input image dimensions
max_boxes = 10  # Maximum boxes after filtering
score_threshold = 0.6  # Confidence score threshold
iou_threshold = 0.5  # Non-max suppression IOU threshold
# Call the yolo_eval function to process the outputs
scores, boxes, classes = yolo_eval(
    yolo_outputs, 
    image_shape=image_shape, 
    max_boxes=max_boxes, 
    score_threshold=score_threshold, 
    iou_threshold=iou_threshold
)
# Print results for debugging
print("Scores[2]:", scores.numpy()[2] if len(scores) > 2 else "Not enough scores")
print("Boxes[2]:", boxes.numpy()[2] if len(boxes) > 2 else "Not enough boxes")
print("Classes[2]:", classes.numpy()[2] if len(classes) > 2 else "Not enough classes")
print("Scores shape:", scores.shape)
print("Boxes shape:", boxes.shape)
print("Classes shape:", classes.shape)


## 3 - Test YOLO pretrained model on images

### 3.1 - Defining classes, anchors, and image shape.

In [None]:
# Load class names from the specified file.
class_names = read_classes("model_data/coco_classes.txt")

# Load anchor box dimensions from the specified file.
anchors = read_anchors("model_data/yolo_anchors.txt")

# Define the shape of the input image as height and width.
image_shape = (720., 1280.)


### 3.2 - Loading a pretrained model

In [None]:
# Load the pre-trained YOLO model from the specified file.
yolo_model = load_model("model_data/yolo.h5")


In [None]:
# Display the summary of the YOLO model architecture
yolo_model.summary()


### 3.3 - Convert output of the model to usable bounding box tensor

In [None]:
yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))

### 3.4 - Filtering boxes

In [None]:
# Evaluate the YOLO model's outputs to filter detections
scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)


### 3.5 - Run the graph on an image

In [None]:
def predict(image_file, yolo_model, class_names):
    """
    Predicts bounding boxes for an image using a YOLO model.

    Args:
        image_file (str): Path to the input image file.
        yolo_model (tf.keras.Model): The YOLO model used for prediction.
        class_names (list): List of class names that YOLO can detect.

    Returns:
        tuple: A tuple containing:
            - out_scores (numpy.ndarray): Confidence scores for the predicted boxes.
            - out_boxes (numpy.ndarray): Coordinates of the predicted bounding boxes.
            - out_classes (numpy.ndarray): Class indices corresponding to the predicted boxes.
    """
    
    # Load and preprocess the image
    image_path = os.path.join("images", image_file)
    image, image_data = preprocess_image(image_path, model_image_size=(608, 608))

    # Run inference using the YOLO model (TensorFlow 2.x uses eager execution by default)
    outputs = yolo_model(image_data, training=False)

    # Extract predictions: scores, boxes, and classes
    scores = outputs['scores']  # Replace with actual output key
    boxes = outputs['boxes']    # Replace with actual output key
    classes = outputs['classes']  # Replace with actual output key

    # Rescale boxes to the original image size
    image_width, image_height = image.size
    boxes[:, [0, 2]] *= image_width
    boxes[:, [1, 3]] *= image_height

    # Filter and process outputs if needed (e.g., confidence threshold or NMS)
    out_scores, out_boxes, out_classes = scores, boxes, classes

    # Print number of boxes detected
    print(f'Found {len(out_boxes)} boxes for {image_file}')

    # Generate colors for bounding boxes
    colors = generate_colors(class_names)

    # Draw bounding boxes on the image and save the result
    draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
    output_path = os.path.join("out", image_file)
    image.save(output_path, quality=90)

    # Display the output image
    output_image = ndimage.imread(output_path)
    plt.imshow(output_image)
    plt.axis('off')
    plt.show()

    return out_scores, out_boxes, out_classes


In [None]:
# Get predicted scores, boxes, and classes for the test image
out_scores, out_boxes, out_classes = predict("test.jpg")


**References**: The ideas presented in this notebook came primarily from the two YOLO papers. The implementation here also took significant inspiration and used many components from Allan Zelener's GitHub repository. The pre-trained weights used in this exercise came from the official YOLO website. 
- Joseph Redmon, Santosh Divvala, Ross Girshick, Ali Farhadi - [You Only Look Once: Unified, Real-Time Object Detection](https://arxiv.org/abs/1506.02640) (2015)
- Joseph Redmon, Ali Farhadi - [YOLO9000: Better, Faster, Stronger](https://arxiv.org/abs/1612.08242) (2016)
- Allan Zelener - [YAD2K: Yet Another Darknet 2 Keras](https://github.com/allanzelener/YAD2K)
- The official YOLO website (https://pjreddie.com/darknet/yolo/) 

**Car detection dataset**:
<a rel="license" href="http://creativecommons.org/licenses/by/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by/4.0/88x31.png" /></a><br /><span xmlns:dct="http://purl.org/dc/terms/" property="dct:title">The Drive.ai Sample Dataset</span> (provided by drive.ai) is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</a>. We are grateful to Brody Huval, Chih Hu and Rahul Patel for  providing this data. 