In [2]:
%pip install tensorflow opencv-python matplotlib keras

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0 -> 23.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# Import standard dependencies
import cv2
import os
import random
import numpy as np
from PIL import Image, ImageDraw
import pandas as pd
from tqdm import tqdm
import shutil

In [4]:
from keras.layers import BatchNormalization, Activation, Reshape
from keras.layers import Concatenate, ZeroPadding2D, GlobalAveragePooling2D, Input, Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from keras.regularizers import l2
from keras import backend as K
import tensorflow as tf

In [5]:
# Avoid OOM errors by setting GPU Memory Consumption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus: 
    tf.config.experimental.set_memory_growth(gpu, True)

In [6]:
# first run this script for this images:
IMAGES_DIR = 'data/WIDERFace/WIDER_train/images/'
BOXES_PATH = 'data/WIDERFace/wider_face_split/wider_face_train_bbx_gt.txt'
RESULT_DIR = 'data/WIDERFace/train/'

# then run for this images:
# IMAGES_DIR = 'data/WIDERFace/WIDER_val/images/'
# BOXES_PATH = 'data/WIDERFace/wider_face_split/wider_val_train_bbx_gt.txt'
# RESULT_DIR = 'data/WIDERFace/val/'

In [None]:
weight_decay = 1e-4
score_threshold = 0.05
iou_threshold = 0.3
image_size = 300

localization_loss_weight = 1.0
confidence_loss_weight = 1.0
class_loss_weight = 1.0
regularisation_loss_weight = 1e-3

lr_bounds = [16000, 20000]
lr_values = [4e-3, 4e-4, 4e-5]

image_size = [1024, 1024]
image_size = [300, 300]
batch_size = 16
epochs = 100
num_classes = 2
num_steps = 24000
num_eval_steps = 1000
train_dataset = "data/WIDERFace/WIDER_train/"
val_dataset = "data/WIDERFace/WIDER_val/"
train_annotations = "data/WIDERFace/wider_face_split/wider_face_train_bbx_gt.txt"
val_annotations = "data/WIDERFace/wider_face_split/wider_face_val_bbx_gt.txt"
train_tfrecord = "data/WIDERFace/train.tfrecord"
val_tfrecord = "data/WIDERFace/val.tfrecord"
checkpoint_dir = "data/WIDERFace/checkpoints/"
log_dir = "data/WIDERFace/logs/"
output_dir = "data/WIDERFace/output/"
# Create output directory
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
# Create checkpoint directory
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)

# Create log directory
if not os.path.exists(log_dir):
    os.makedirs(log_dir)



In [7]:
# collect paths to all images

all_paths = []
for path, subdirs, files in tqdm(os.walk(IMAGES_DIR)):
    for name in files:
        all_paths.append(os.path.join(path, name))
        
metadata = pd.DataFrame(all_paths, columns=['full_path'])

# strip root folder
metadata['path'] = metadata.full_path.apply(lambda x: os.path.relpath(x, IMAGES_DIR))

62it [00:00, 1016.78it/s]


In [8]:
# see all unique endings
metadata.path.apply(lambda x: x.split('.')[-1]).unique()

# get all images
len(metadata)

# read all boxes
with open(BOXES_PATH, 'r') as f:
    content = f.readlines()
    content = [s.strip() for s in content]

In [9]:
# split annotations by image
boxes = {}
num_lines = len(content)
i = 0
name = None

while i < num_lines:
    s = content[i]
    file = s.split('/')[-1]
    if s.endswith('.jpg'):
        if name is not None:
            assert len(boxes[name]) == num_boxes
        name = s
        boxes[name] = []
        i += 1
        num_boxes = int(content[i])
        i += 1
    else:
        xmin, ymin, w, h = s.split(' ')[:4]
        xmin, ymin, w, h = int(xmin), int(ymin), int(w), int(h)
        if h <= 0 or w <= 0:
            print(name)  
            # some boxes are weird!
            # so i don't use them
            num_boxes -= 1
        else:
            boxes[name].append((xmin, ymin, w, h))
        i += 1

ValueError: invalid literal for int() with base 10: 'data\\WIDERFace\\WIDER_train\\images/0--Parade/0_Parade_marchingband_1_849.jpg'

In [None]:
def draw_boxes_on_image(path, boxes):

    image = Image.open(path)
    draw = ImageDraw.Draw(image, 'RGBA')
    width, height = image.size

    for b in boxes:
        xmin, ymin, w, h = b
        xmax, ymax = xmin + w, ymin + h

        fill = (255, 255, 255, 45)
        outline = 'red'
        draw.rectangle(
            [(xmin, ymin), (xmax, ymax)],
            fill=fill, outline=outline
        )
    return image

i = random.randint(0, len(metadata) - 1)  # choose a random image
some_boxes = boxes[metadata.path[i]]
draw_boxes_on_image(metadata.full_path[i], some_boxes)

In [None]:
def get_annotation(path, width, height):
    name = path.split('/')[-1]
    annotation = {
      "filename": name,
      "size": {"depth": 3, "width": width, "height": height}
    }
    objects = []
    for b in boxes[path]:
        xmin, ymin, w, h = b
        xmax, ymax = xmin + w, ymin + h
        objects.append({
            "bndbox": {"ymin": ymin, "ymax": ymax, "xmax": xmax, "xmin": xmin}, 
            "name": "face"
        })
    annotation["object"] = objects
    return annotation

In [None]:
# create a folder for the converted dataset
shutil.rmtree(RESULT_DIR, ignore_errors=True)
os.mkdir(RESULT_DIR)
os.mkdir(os.path.join(RESULT_DIR, 'images'))
os.mkdir(os.path.join(RESULT_DIR, 'annotations'))

In [None]:
for T in tqdm(metadata.itertuples()):
    
    # get width and height of an image
    image = cv2.imread(T.full_path)
    h, w, c = image.shape
    assert c == 3
    
    # name of the image
    name = T.path.split('/')[-1]
    assert name.endswith('.jpg')

    # copy the image
    shutil.copy(T.full_path, os.path.join(RESULT_DIR, 'images', name))
    
    # save annotation for it
    d = get_annotation(T.path, w, h)
    json_name = name[:-4] + '.json'
    json.dump(d, open(os.path.join(RESULT_DIR, 'annotations', json_name), 'w')) 

In [None]:
import io
import os
import PIL.Image
import tensorflow as tf
import json
import shutil
import random
import math
import argparse
from tqdm import tqdm
import sys


"""
The purpose of this script is to create a set of .tfrecords files
from a folder of images and a folder of annotations.
Annotations are in the json format.
Images must have .jpg or .jpeg filename extension.
Example of a json annotation (with filename "132416.json"):
{
  "object": [
    {"bndbox": {"ymin": 20, "ymax": 276, "xmax": 1219, "xmin": 1131}, "name": "face"},
    {"bndbox": {"ymin": 1, "ymax": 248, "xmax": 1149, "xmin": 1014}, "name": "face"}
  ],
  "filename": "132416.jpg",
  "size": {"depth": 3, "width": 1920, "height": 1080}
}
Example of use:
python create_tfrecords.py \
    --image_dir=/home/gpu2/hdd/dan/WIDER/val/images/ \
    --annotations_dir=/home/gpu2/hdd/dan/WIDER/val/annotations/ \
    --output=data/train_shards/ \
    --num_shards=100
"""


def make_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--image_dir', type=str)
    parser.add_argument('-a', '--annotations_dir', type=str)
    parser.add_argument('-o', '--output', type=str)
    parser.add_argument('-s', '--num_shards', type=int, default=1)
    return parser.parse_args()


def dict_to_tf_example(annotation, image_dir):
    """Convert dict to tf.Example proto.
    Notice that this function normalizes the bounding
    box coordinates provided by the raw data.
    Arguments:
        data: a dict.
        image_dir: a string, path to the image directory.
    Returns:
        an instance of tf.Example.
    """
    image_name = annotation['filename']
    assert image_name.endswith('.jpg') or image_name.endswith('.jpeg')

    image_path = os.path.join(image_dir, image_name)
    with tf.gfile.GFile(image_path, 'rb') as f:
        encoded_jpg = f.read()

    # check image format
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG!')

    width = int(annotation['size']['width'])
    height = int(annotation['size']['height'])
    assert width > 0 and height > 0
    assert image.size[0] == width and image.size[1] == height
    ymin, xmin, ymax, xmax = [], [], [], []

    just_name = image_name[:-4] if image_name.endswith('.jpg') else image_name[:-5]
    annotation_name = just_name + '.json'
    if len(annotation['object']) == 0:
        print(annotation_name, 'is without any objects!')

    for obj in annotation['object']:
        a = float(obj['bndbox']['ymin'])/height
        b = float(obj['bndbox']['xmin'])/width
        c = float(obj['bndbox']['ymax'])/height
        d = float(obj['bndbox']['xmax'])/width
        assert (a < c) and (b < d)
        ymin.append(a)
        xmin.append(b)
        ymax.append(c)
        xmax.append(d)
        assert obj['name'] == 'face'

    example = tf.train.Example(features=tf.train.Features(feature={
        'filename': _bytes_feature(image_name.encode()),
        'image': _bytes_feature(encoded_jpg),
        'xmin': _float_list_feature(xmin),
        'xmax': _float_list_feature(xmax),
        'ymin': _float_list_feature(ymin),
        'ymax': _float_list_feature(ymax),
    }))
    return example


def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def _float_list_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))


def main():
    ARGS = make_args()

    image_dir = ARGS.image_dir
    annotations_dir = ARGS.annotations_dir
    print('Reading images from:', image_dir)
    print('Reading annotations from:', annotations_dir, '\n')

    examples_list = os.listdir(annotations_dir)
    num_examples = len(examples_list)
    print('Number of images:', num_examples)

    num_shards = ARGS.num_shards
    shard_size = math.ceil(num_examples/num_shards)
    print('Number of images per shard:', shard_size)

    output_dir = ARGS.output
    shutil.rmtree(output_dir, ignore_errors=True)
    os.mkdir(output_dir)

    shard_id = 0
    num_examples_written = 0
    for example in tqdm(examples_list):

        if num_examples_written == 0:
            shard_path = os.path.join(output_dir, 'shard-%04d.tfrecords' % shard_id)
            writer = tf.python_io.TFRecordWriter(shard_path)

        path = os.path.join(annotations_dir, example)
        annotation = json.load(open(path))
        tf_example = dict_to_tf_example(annotation, image_dir)
        writer.write(tf_example.SerializeToString())
        num_examples_written += 1

        if num_examples_written == shard_size:
            shard_id += 1
            num_examples_written = 0
            writer.close()

    if num_examples_written != shard_size and num_examples % num_shards != 0:
        writer.close()

    print('Result is here:', ARGS.output)


main()

In [None]:
def preprocess(file_path):
    
    # Read in image from file path
    byte_img = tf.io.read_file(file_path)
    # Load in the image 
    img = tf.io.decode_jpeg(byte_img)
    
    # Preprocessing steps - resizing the image to be 100x100x3
    img = tf.image.resize(img, (100,100))
    # Scale image to be between 0 and 1 
    img = img / 255.0

    # Return image
    return img

In [None]:
def data_aug(img):
    data = []
    for i in range(9):
        img = tf.image.stateless_random_brightness(img, max_delta=0.02, seed=(1,2))
        img = tf.image.stateless_random_contrast(img, lower=0.6, upper=1, seed=(1,3))
        # img = tf.image.stateless_random_crop(img, size=(20,20,3), seed=(1,2))
        img = tf.image.stateless_random_flip_left_right(img, seed=(np.random.randint(100),np.random.randint(100)))
        img = tf.image.stateless_random_jpeg_quality(img, min_jpeg_quality=90, max_jpeg_quality=100, seed=(np.random.randint(100),np.random.randint(100)))
        img = tf.image.stateless_random_saturation(img, lower=0.9,upper=1, seed=(np.random.randint(100),np.random.randint(100)))
            
        data.append(img)
    
    return data

In [None]:
from keras import regularizers

import torch.nn as nn
import torch.nn.functional as F

class SSD(nn.Module):
    def __init__(self, num_classes):
        super(SSD, self).__init__()
        
        # Define the base VGG16 network
        self.base = nn.ModuleList([
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6),
            nn.ReLU(inplace=True),
            nn.Conv2d(1024, 1024, kernel_size=1),
            nn.ReLU(inplace=True)
        ])
        
        # Define the auxiliary convolutional layers
        self.aux_convs = nn.ModuleList([
            nn.Conv2d(512, 256, kernel_size=1),
            nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1),
            nn.Conv2d(512, 128, kernel_size=1),
            nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),
            nn.Conv2d(256, 128, kernel_size=1),
            nn.Conv2d(128, 256, kernel_size=3),
            nn.Conv2d(256, 128, kernel_size=1),
            nn.Conv2d(128, 256, kernel_size=3)
        ])
        
        # Define the localization and classification layers
        self.loc_layers = nn.ModuleList([
            nn.Conv2d(512, 4 * 4, kernel_size=3, padding=1),
            nn.Conv2d(1024, 6 * 4, kernel_size=3, padding=1),
            nn.Conv2d(512, 6 * 4, kernel_size=3, padding=1),
            nn.Conv2d(256, 6 * 4, kernel_size=3, padding=1),
            nn.Conv2d(256, 4 * 4, kernel_size=3, padding=1),
            nn.Conv2d(256, 4 * 4, kernel_size=3, padding=0),
            nn.Conv2d(256, 4 * 4, kernel_size=3, padding=0),
            nn.Conv2d(256, 4 * 4, kernel_size=3, padding=0)
        ])
        
        self.cls_layers = nn.ModuleList([
            nn.Conv2d(512, 4 * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(1024, 6 * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(512, 6 * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(256, 6 * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(256, 4 * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(256, 4 * num_classes, kernel_size=3, padding=0),
            nn.Conv2d(256, 4 * num_classes, kernel_size=3, padding=0),
            nn.Conv2d(256, 4 * num_classes, kernel_size=3, padding=0)
        ])


In [None]:
# Create an instance of the SSD class
ssd = SSD(num_classes=2)

# Define the input layer
input_layer = Input(shape=(300, 300, 3))

# Define the base VGG16 network
vgg16 = ssd.base(input_layer)



# Define the model
model = ssd(inputs=input_layer)

# Compile the model with L2 regularization loss
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'],
              loss_weights=[1., 0.01]) # set the weight for the L2 regularization loss

# Train the model
model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_val, y_val))



In [None]:
import tensorflow as tf

# Confidence loss
def confidence_loss(y_true, y_pred):
    """
    Computes the binary cross-entropy loss for the confidence scores.
    y_true: true class labels (batch_size, num_anchors, num_classes+1)
    y_pred: predicted class scores (batch_size, num_anchors, num_classes+1)
    """
    # Extract the true and predicted class scores
    true_scores = y_true[:,:,1:]
    pred_scores = y_pred[:,:,1:]
    
    # Compute the binary cross-entropy loss
    conf_loss = tf.keras.losses.binary_crossentropy(true_scores, pred_scores, from_logits=True)
    
    # Mask out the negative anchor boxes
    mask = y_true[:,:,0]
    conf_loss = conf_loss * mask
    
    # Compute the average loss over positive samples
    num_positives = tf.reduce_sum(mask)
    conf_loss = tf.reduce_sum(conf_loss) / (num_positives + 1e-6)
    
    return conf_loss

# Localization loss
def localization_loss(y_true, y_pred):
    """
    Computes the smooth L1 loss for the bounding box locations.
    y_true: true bounding box coordinates (batch_size, num_anchors, 4)
    y_pred: predicted bounding box coordinates (batch_size, num_anchors, 4)
    """
    # Compute the smooth L1 loss
    loc_loss = tf.keras.losses.Huber(delta=1.0)(y_true, y_pred)
    
    # Mask out the negative anchor boxes
    mask = tf.reduce_sum(tf.abs(y_true), axis=-1)
    loc_loss = loc_loss * mask
    
    # Compute the average loss over positive samples
    num_positives = tf.reduce_sum(mask)
    loc_loss = tf.reduce_sum(loc_loss) / (num_positives + 1e-6)
    
    return loc_loss

# Hard negative mining loss
def hard_negative_mining_loss(y_true, y_pred, neg_ratio=3):
    """
    Computes the hard negative mining loss for the negative anchor boxes.
    y_true: true class labels (batch_size, num_anchors, num_classes+1)
    y_pred: predicted class scores (batch_size, num_anchors, num_classes+1)
    neg_ratio: the ratio of negative samples to positive samples
    """
    # Extract the true and predicted class scores
    true_scores = y_true[:,:,1:]
    pred_scores = y_pred[:,:,1:]
    
    # Compute the binary cross-entropy loss
    conf_loss = tf.keras.losses.binary_crossentropy(true_scores, pred_scores, from_logits=True)
    
    # Mask out the positive and negative anchor boxes
    pos_mask = y_true[:,:,0]
    neg_mask = tf.logical_not(pos_mask)
    
    # Compute the number of negative samples to keep
    num_positives = tf.reduce_sum(pos_mask, axis=-1)
    num_negatives = tf.reduce_sum(neg_mask, axis=-1)
    num_negatives_keep = tf.cast(neg_ratio * num_positives, tf.int32)
    
    # Only keep the hardest negative samples
    conf_loss_neg = conf_loss * tf.cast(neg_mask, tf.float32)
    conf_loss_neg = tf.where(tf.math.is_nan(conf_loss_neg), tf.zeros_like(conf_loss_neg), conf_loss_neg) # prevent NaN values
    conf_loss_neg, _ = tf.nn.top_k(tf.reshape(conf_loss_neg, (-1,)), k=tf.reduce_sum(num_negatives_keep))
    conf_loss_neg = tf.reduce_mean(conf_loss_neg)
    
    # Compute the total loss (positive and negative)
    conf_loss_pos = tf.reduce_sum(conf_loss * pos_mask, axis=-1)
    total_loss = conf_loss_pos + conf_loss_neg
    
    return total_loss


In [None]:
import tensorflow as tf
import numpy as np

# Generate some test data
batch_size = 2
num_anchors = 3
num_classes = 4
num_coords = 4
y_true = np.random.randint(0, 2, size=(batch_size, num_anchors, num_classes+1)).astype('float32')
y_pred = np.random.rand(batch_size, num_anchors, num_classes+1).astype('float32')
loc_true = np.random.rand(batch_size, num_anchors, num_coords).astype('float32')
loc_pred = np.random.rand(batch_size, num_anchors, num_coords).astype('float32')
weights = np.random.rand(batch_size, num_anchors).astype('float32')

# Compute the expected losses
expected_classification_loss = tf.keras.losses.binary_crossentropy(y_true[:,:,1:], y_pred[:,:,1:], from_logits=True)
expected_classification_loss = tf.reduce_sum(expected_classification_loss * y_true[:,:,0], axis=-1)
expected_classification_loss = tf.reduce_mean(expected_classification_loss)

expected_localization_loss = tf.keras.losses.huber(loc_true, loc_pred, delta=1.0)
expected_localization_loss = tf.reduce_sum(expected_localization_loss * weights, axis=-1)
expected_localization_loss = tf.reduce_mean(expected_localization_loss)

expected_hard_negative_mining_loss = expected_classification_loss

expected_regularization_loss = tf.reduce_sum(tf.abs(y_pred[:,:,1:]), axis=-1)
expected_regularization_loss = tf.reduce_mean(expected_regularization_loss)

# Call each loss function on the test data
classification_loss_value = confidence_loss(y_true, y_pred).numpy()
localization_loss_value = localization_loss(loc_true, loc_pred, weights).numpy()
hard_negative_mining_loss_value = hard_negative_mining_loss(y_true, y_pred).numpy()
regularization_loss_value = regularization_loss(y_pred).numpy()

# Compare the expected and actual loss values
np.testing.assert_allclose(classification_loss_value, expected_classification_loss.numpy())
np.testing.assert_allclose(localization_loss_value, expected_localization_loss.numpy())
np.testing.assert_allclose(hard_negative_mining_loss_value, expected_hard_negative_mining_loss.numpy())
np.testing.assert_allclose(regularization_loss_value, expected_regularization_loss.numpy())


In [None]:
import matplotlib.pyplot as plt

history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=10)


# Plot the performance against all four losses
plt.plot(history.history['conf_loss'], label='Confidence loss')
plt.plot(history.history['loc_loss'], label='Localization loss')
plt.plot(history.history['neg_loss'], label='Hard negative mining loss')
plt.plot(history.history['reg_loss'], label='Regularization loss')
plt.title('Training Losses')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
def post_process_predictions(predictions, confidence_threshold=0.5, nms_iou_threshold=0.5):
    """
    Post-processes the predictions to obtain the bounding boxes and class scores.
    predictions: the output of the model.predict() function (batch_size, num_anchors, num_classes + 4)
    confidence_threshold: the minimum confidence threshold for the predicted class scores
    nms_iou_threshold: the intersection over union threshold for non-maximum suppression
    """
    # Extract the predicted class scores and bounding box offsets
    confidences = predictions[:,:,1:]
    offsets = predictions[:,:,0:4]
    
    # Decode the anchor boxes
    boxes = decode_boxes(offsets)
    
    # Apply the sigmoid activation function to the predicted class scores
    confidences = tf.sigmoid(confidences)
    
    # Find the index of the class with the highest score for each anchor box
    class_indices = tf.argmax(confidences, axis=-1)
    
    # Extract the highest class score for each anchor box
    class_scores = tf.reduce_max(confidences, axis=-1)
    
    # Apply the confidence threshold
    mask = class_scores >= confidence_threshold
    boxes = tf.boolean_mask(boxes, mask)
    class_indices = tf.boolean_mask(class_indices, mask)
    class_scores = tf.boolean_mask(class_scores, mask)
    
    # Apply non-maximum suppression
    selected_indices = tf.image.non_max_suppression(boxes, class_scores, max_output_size=100, iou_threshold=nms_iou_threshold)
    selected_boxes = tf.gather(boxes, selected_indices)
    selected_class_indices = tf.gather(class_indices, selected_indices)
    selected_class_scores = tf.gather(class_scores, selected_indices)
    
    return selected_boxes, selected_class_scores, selected_class_indices


def visualize_predictions(images, boxes, scores, class_indices, class_names):
    """
    Visualizes the predicted bounding boxes on the test images.
    images: the test images (batch_size, height, width, channels)
    boxes: the predicted bounding boxes (num_boxes, 4)
    scores: the predicted class scores (num_boxes,)
    class_indices: the predicted class indices (num_boxes,)
    class_names: a list of class names
    """
    for i in range(images.shape[0]):
        # Convert the image from BGR to RGB
        image = cv2.cvtColor(images[i], cv2.COLOR_BGR2RGB)
        
        # Convert the boxes from center-size format to top-left corner format
        boxes_i = convert_boxes(boxes[i])
        
        # Draw the predicted bounding boxes and class labels on the image
        for box, score, class_idx in zip(boxes_i, scores[i], class_indices[i]):
            x1, y1, x2, y2 = box
            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
            label = f"{class_names[class_idx]}: {score:.2f}"
            cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
        
        # Show the image
        plt.imshow(image)
        plt.axis("off")
        plt.show()


In [None]:
from keras.models import load_model

model.save('ssd300.h5')

model = load_model('ssd300.h5', custom_objects={'confidence_loss': confidence_loss, 'localization_loss': localization_loss, 'hard_negative_mining_loss': hard_negative_mining_loss, 'regularization_loss': regularization_loss})

# Load the test images
images = []
for i in range(1, 11):
    image = cv2.imread(f"images/test{i}.jpg")
    images.append(image)
images = np.array(images)

# Preprocess the images
images = preprocess_images(images)

# Make predictions
predictions = model.predict(images)

# Post-process the predictions
boxes, scores, class_indices = post_process_predictions(predictions)
