This ipynb shows the use of two pre-trained models for object detection
by Srivatsan Rangarajan

In [2]:
import tensorflow as tf
from tensorflow import keras
import keras_cv
import numpy as np
from keras_cv import visualization
from keras_cv.layers import MultiClassNonMaxSuppression

ModuleNotFoundError: No module named 'keras_cv'

In [None]:
# 1. Download a street image
street_image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/1/17/Street_view_in_Philadelphia.jpg/800px-Street_view_in_Philadelphia.jpg"
street_image_path = tf.keras.utils.get_file("street_image.jpg", origin=street_image_url)

In [None]:
# 2. Load and preprocess the image
image = keras.utils.load_img(street_image_path)
image = np.array(image)
batched_image = np.expand_dims(image, axis=0)

In [None]:
# Use a resizing layer to standardize the input size (640x640) with padding to preserve aspect ratio
inference_resizing = keras_cv.layers.Resizing(
    640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
)
resized_image = inference_resizing(image)
batched_resized_image = np.expand_dims(resized_image, axis=0)

In [None]:
# 3. Load pretrained model 1: Faster R-CNN (COCO preset)
fasterrcnn_model = keras_cv.models.FasterRCNN.from_preset(
    "fasterrcnn_resnet50_v1_coco", bounding_box_format="xywh"
)

In [None]:
# 4. Load pretrained model 2: YOLO (COCO preset)
# (Note: Ensure your keras_cv version supports the YOLO model and the chosen preset.)
yolo_model = keras_cv.models.YOLO.from_preset(
    "yolo_v8_coco", bounding_box_format="xywh"
)

In [None]:
# 5. Define a COCO class mapping (partial list for demonstration)
coco_class_ids = [
    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck",
    "traffic light", "fire hydrant"
]
coco_class_mapping = dict(zip(range(len(coco_class_ids)), coco_class_ids))

In [None]:
# 6. Run inference using Faster R-CNN
y_pred_frcnn = fasterrcnn_model.predict(batched_resized_image)
visualization.plot_bounding_box_gallery(
    batched_resized_image,
    value_range=(0, 255),
    rows=1,
    cols=1,
    y_pred=y_pred_frcnn,
    scale=5,
    font_scale=0.7,
    bounding_box_format="xywh",
    class_mapping=coco_class_mapping,
)

In [None]:
# 7. Run inference using YOLO
y_pred_yolo = yolo_model.predict(batched_resized_image)
visualization.plot_bounding_box_gallery(
    batched_resized_image,
    value_range=(0, 255),
    rows=1,
    cols=1,
    y_pred=y_pred_yolo,
    scale=5,
    font_scale=0.7,
    bounding_box_format="xywh",
    class_mapping=coco_class_mapping,
)

In [None]:
# 8. Adjust detection thresholds via Non-Max Suppression (NMS)
# For Faster R-CNN: Tighter thresholds for more selective detections
nms_frcnn = MultiClassNonMaxSuppression(
    bounding_box_format="xywh",
    from_logits=True,
    iou_threshold=0.2,          # Lower IoU threshold
    confidence_threshold=0.7,     # Higher confidence threshold
)
fasterrcnn_model.prediction_decoder = nms_frcnn
y_pred_frcnn_adjusted = fasterrcnn_model.predict(batched_resized_image)
visualization.plot_bounding_box_gallery(
    batched_resized_image,
    value_range=(0, 255),
    rows=1,
    cols=1,
    y_pred=y_pred_frcnn_adjusted,
    scale=5,
    font_scale=0.7,
    bounding_box_format="xywh",
    class_mapping=coco_class_mapping,
)

# For YOLO: Similar threshold adjustment
nms_yolo = MultiClassNonMaxSuppression(
    bounding_box_format="xywh",
    from_logits=True,
    iou_threshold=0.2,
    confidence_threshold=0.7,
)
yolo_model.prediction_decoder = nms_yolo
y_pred_yolo_adjusted = yolo_model.predict(batched_resized_image)
visualization.plot_bounding_box_gallery(
    batched_resized_image,
    value_range=(0, 255),
    rows=1,
    cols=1,
    y_pred=y_pred_yolo_adjusted,
    scale=5,
    font_scale=0.7,
    bounding_box_format="xywh",
    class_mapping=coco_class_mapping,
)
