### Imports 

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
import glob
import cv2
import os

2023-04-21 09:35:08.558691: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-21 09:35:09.025284: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-21 09:35:10.506636: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:
2023-04-21 09:35:10.507299: W tensorflow/compiler/xla/stream_exe

In [2]:
class Yolo:
    """Uses You only look once (YOLO)v3 to perform object detection
    Args:
        model_path: path to where a Darknet Keras model is stored
        classes_path: path to where the list of class names used for the Darknet model, listed in order of index, can be found
        class_t: float representing the box score threshold for the initial filtering step
        nms_t: float representing the IOU threshold for non-max suppression
        anchors: numpy.ndarray of shape (outputs, anchor_boxes, 2) containing all of the anchor boxes:
            outputs is the number of outputs (predictions) made by the Darknet model
            anchor_boxes is the number of anchor boxes used for each prediction
            2 => [anchor_box_width, anchor_box_height]
    """
    def __init__(self, model_path, classes_path, class_t, nms_t, anchors):
        """Initializes the Yolo class"""
        self.model = load_model(model_path)
        with open(classes_path) as f:
            self.class_names = [line.strip() for line in f]
        self.class_t = class_t
        self.nms_t = nms_t
        self.anchors = anchors

    def process_outputs(self, outputs, image_size):
        """Process Darknet model outputs
        Args:
            outputs: list of numpy.ndarrays containing the predictions from
                the Darknet model for a single image:
                Each output will have the shape (grid_height, grid_width,
                    anchor_boxes, 4 + 1 + classes)
                    grid_height & grid_width => the height and width of the
                        grid used for the output
                    anchor_boxes => the number of anchor boxes used
                    4 => (t_x, t_y, t_w, t_h)
                    1 => box_confidence
                    classes => class probabilities for all classes
            image_size: numpy.ndarray containing the image’s original size
                [image_height, image_width]
        Returns:
            A tuple of (boxes, box_confidences, box_class_probs):
                boxes: a list of numpy.ndarrays of shape (grid_height,
                    grid_width, anchor_boxes, 4) containing the processed
                    boundary boxes for each output, respectively:
                    4 => (x1, y1, x2, y2)
                        (x1, y1, x2, y2) should represent the boundary box
                        relative to original image
                box_confidences: a list of numpy.ndarrays of shape (grid_height,
                    grid_width, anchor_boxes, 1) containing the box confidences
                    for each output, respectively
                box_class_probs: a list of numpy.ndarrays of shape (grid_height,
                    grid_width, anchor_boxes, classes) containing the box’s
                    class probabilities for each output, respectively
        """
        boxes = []
        box_confidences = []
        box_class_probs = []
        for i in range(len(outputs)):
            boxes.append(outputs[i][..., :4])
            box_confidences.append(1 / (1 + np.exp(-outputs[i][..., 4:5])))
            box_class_probs.append(1 / (1 + np.exp(-outputs[i][..., 5:])))
        image_height, image_width = image_size
        for i in range(len(boxes)):
            grid_height = outputs[i].shape[0]
            grid_width = outputs[i].shape[1]
            anchor_boxes = outputs[i].shape[2]
            for cy in range(grid_height):
                for cx in range(grid_width):
                    for b in range(anchor_boxes):
                        tx, ty, tw, th = boxes[i][cy, cx, b]
                        pw, ph = self.anchors[i][b]
                        bx = (1 / (1 + np.exp(-tx))) + cx
                        by = (1 / (1 + np.exp(-ty))) + cy
                        bw = pw * np.exp(tw)
                        bh = ph * np.exp(th)
                        bx /= grid_width
                        by /= grid_height
                        bw /= self.model.input.shape[1]
                        bh /= self.model.input.shape[2]
                        x1 = (bx - (bw / 2)) * image_width
                        y1 = (by - (bh / 2)) * image_height
                        x2 = (bx + (bw / 2)) * image_width
                        y2 = (by + (bh / 2)) * image_height
                        boxes[i][cy, cx, b] = [x1, y1, x2, y2]
        return (boxes, box_confidences, box_class_probs)
    
    def filter_boxes(self, boxes, box_confidences, box_class_probs):
        """Removes the boxes with low box scores
        Args:
            boxes: list of numpy.ndarrays of shape (grid_height, grid_width,
                anchor_boxes, 4) containing the processed boundary boxes for
                each output, respectively
            box_confidences: list of numpy.ndarrays of shape (grid_height,
                grid_width, anchor_boxes, 1) containing the processed box
                confidences for each output, respectively
            box_class_probs: list of numpy.ndarrays of shape (grid_height,
                grid_width, anchor_boxes, classes) containing the processed
                box class probabilities for each output, respectively
        Returns:
            A tuple of (filtered_boxes, box_classes, box_scores):
                filtered_boxes: a numpy.ndarray of shape (?, 4) containing all
                    of the filtered bounding boxes:
                box_classes: a numpy.ndarray of shape (?,) containing the class
                    number that each box in filtered_boxes predicts
                box_scores: a numpy.ndarray of shape (?) containing the box
                    scores"""
        filtered_boxes, box_classes, box_scores = None, [], []
        for i in range(len(boxes)):
            cur_box_score = box_confidences[i] * box_class_probs[i]
            cur_box_class = np.argmax(cur_box_score, axis=-1)
            cur_box_score = np.max(cur_box_score, axis=-1)
            mask = cur_box_score >= self.class_t
            if filtered_boxes is None:
                filtered_boxes = boxes[i][mask]
                box_scores = cur_box_score[mask]
                box_classes = cur_box_class[mask]
            else:
                filtered_boxes = np.concatenate((filtered_boxes,
                                                 boxes[i][mask]),
                                                axis=0)
                box_classes = np.concatenate((box_classes,
                                                 cur_box_class[mask]),
                                                axis=0)
                box_scores = np.concatenate((box_scores,
                                                 cur_box_score[mask]),
                                                axis=0)
        return (filtered_boxes, box_classes, box_scores)

    def iou(self, box1, box2):
        """Method to calculate the Intersection over Union
        Args:
            box1: first box
            box2: second box
        Returns:
            the Intersection over Union of the two boxes
        """
        x1, y1, x2, y2 = box1[1]
        x3, y3, x4, y4 = box2
        xi1 = max(x1, x3)
        yi1 = max(y1, y3)
        xi2 = min(x2, x4)
        yi2 = min(y2, y4)
        inter_area = max(yi2 - yi1, 0) * max(xi2 - xi1, 0)
        box1_area = (y2 - y1) * (x2 - x1)
        box2_area = (y4 - y3) * (x4 - x3)
        union_area = box1_area + box2_area - inter_area
        return inter_area / union_area

    def non_max_suppression(self, filtered_boxes, box_classes, box_scores):
        box_predictions = []
        predicted_box_classes = []
        predicted_box_scores = []

        for c in set(box_classes):
            idxs = np.where(box_classes == c)
            class_boxes = filtered_boxes[idxs]
            class_box_scores = box_scores[idxs]

            while len(class_boxes) > 0:
                max_idx = np.argmax(class_box_scores)
                box_predictions.append(class_boxes[max_idx])
                predicted_box_classes.append(c)
                predicted_box_scores.append(class_box_scores[max_idx])

                class_boxes = np.delete(class_boxes, max_idx, axis=0)
                class_box_scores = np.delete(class_box_scores, max_idx, axis=0)

                if len(class_boxes) == 0:
                    break

                iou = self.intersection_over_union(box_predictions[-1],
                                                   class_boxes)
                iou_mask = iou < self.nms_t

                class_boxes = class_boxes[iou_mask]
                class_box_scores = class_box_scores[iou_mask]

        return (np.array(box_predictions),
                np.array(predicted_box_classes),
                np.array(predicted_box_scores))

    def intersection_over_union(self, box1, boxes):
        x1 = np.maximum(box1[0], boxes[:, 0])
        y1 = np.maximum(box1[1], boxes[:, 1])
        x2 = np.minimum(box1[2], boxes[:, 2])
        y2 = np.minimum(box1[3], boxes[:, 3])

        intersection_area = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)

        box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
        boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])

        union_area = box1_area + boxes_area - intersection_area

        return intersection_area / union_area
    
    @staticmethod
    def load_images(folder_path):
        """Loads images from a filepath
        Args:
            folder_path: a string representing the path to the folder holding
                all the images to load
        Returns:
            a tuple of (images, image_paths):
                images: a list of images as numpy.ndarrays
                image_paths: a list of paths to the individual images in images
        """
        image_paths = glob.glob(folder_path + '/*')
        images = [cv2.imread(image) for image in image_paths]
        return images, image_paths

    def preprocess_images(self, images):
        """Resizes and Rescales the images to fit the model requirements
        Args:
            images: a list of images as numpy.ndarrays
        Returns:
            a tuple of (pimages, image_shapes):
                pimages: a numpy.ndarray of shape (ni, input_h, input_w, 3)
                    containing all of the preprocessed images
                    - ni: the number of images that were preprocessed
                    - input_h: the input height for the Darknet model
                    - input_w: the input width for the Darknet model
                    - 3: number of color channels
                image_shapes: a numpy.ndarray of shape (ni, 2) containing the
                    original height and width of the images
                    - ni: the number of images that were preprocessed
        """
        pimages = []
        image_shapes = []
        for image in images:
            image_shapes.append(image.shape[:2])
            image = cv2.resize(image, (self.model.input.shape[1],
                                       self.model.input.shape[2]),
                               interpolation=cv2.INTER_CUBIC)
            image = image / 255
            pimages.append(image)
        return (np.array(pimages), np.array(image_shapes))
    
    def show_boxes(self, image, boxes, box_classes, box_scores, file_name):
        """Shows image with all boundary boxes, class names, and box scores
        Args:
            image: a numpy.ndarray containing an unprocessed image
            boxes: a numpy.ndarray containing the boundary boxes for the image
            box_classes: a numpy.ndarray containing the class indices for each
                box
            box_scores: a numpy.ndarray containing the box scores for each box
            file_name: the file path where the original image is stored"""
        for i, box in enumerate(boxes):
            x1, y1, x2, y2 = box
            cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)),
                          (255, 0, 0), 2)
            cv2.putText(image, self.class_names[box_classes[i]] + ' ' +
                        "{:.2f}".format(box_scores[i]),
                        (int(x1-1), int(y1-5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        (0, 0, 255), 1, lineType=cv2.LINE_AA)
        cv2.imshow(file_name, image)
        print("Press 's'")
        key = cv2.waitKey(0)
        if key == ord('s'):
            if os.path.isdir("./detections") is False:
                os.mkdir("./detections")
            cv2.imwrite("./detections/{}".format(file_name), image)
        del key
        cv2.destroyAllWindows()
    
    def predict(self, folder_path):
        """Displays all images using the show_boxes method
        Args:
            folder_path: a string representing the path to the folder holding
                all the images to predict
        Returns:
            A tuple of (predictions, image_paths):
                predictions: list of tuples for each image of
                    (boxes, box_classes, box_scores)
                image_paths: list of image paths corresponding to each
                    prediction in predictions
        """
        images, image_paths = self.load_images(folder_path)
        pimages, image_shapes = self.preprocess_images(images)
        outputs = self.model.predict(pimages)
        predictions = []
        for i, image in enumerate(images):
            three_out = [
                outputs[0][i], outputs[1][i], outputs[2][i]
            ]
            boxes, box_classes, box_scores = self.process_outputs(three_out,
                                                                  image_shapes[i])
            boxes, box_classes, box_scores = self.filter_boxes(boxes,
                                                               box_scores,
                                                               box_classes)
            boxes, box_classes, box_scores = self.non_max_suppression(boxes,
                                                                      box_classes,
                                                                      box_scores)
            predictions.append((boxes, box_classes, box_scores))
            self.show_boxes(image, boxes, box_classes, box_scores,
                            image_paths[i].split('/')[-1])
        return (predictions, image_paths)

#### Task 0 testing

In [None]:
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
yolo.model.summary()
print('Class names:', yolo.class_names)
print('Class threshold:', yolo.class_t)
print('NMS threshold:', yolo.nms_t)
print('Anchor boxes:', yolo.anchors)

### Task 1 Testing

In [None]:
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
output1 = np.random.randn(13, 13, 3, 85)
output2 = np.random.randn(26, 26, 3, 85)
output3 = np.random.randn(52, 52, 3, 85)
boxes, box_confidences, box_class_probs = yolo.process_outputs([output1, output2, output3], np.array([500, 700]))
print('Boxes:', boxes)
print('Box confidences:', box_confidences)
print('Box class probabilities:', box_class_probs)

### Task 2

In [None]:
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
output1 = np.random.randn(13, 13, 3, 85)
output2 = np.random.randn(26, 26, 3, 85)
output3 = np.random.randn(52, 52, 3, 85)
boxes, box_confidences, box_class_probs = yolo.process_outputs([output1, output2, output3], np.array([500, 700]))
boxes, box_classes, box_scores = yolo.filter_boxes(boxes, box_confidences, box_class_probs)
print('Boxes:', boxes)
print('Box classes:', box_classes)
print('Box scores:', box_scores)

### Task 3

In [None]:
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
output1 = np.random.randn(13, 13, 3, 85)
output2 = np.random.randn(26, 26, 3, 85)
output3 = np.random.randn(52, 52, 3, 85)
boxes, box_confidences, box_class_probs = yolo.process_outputs([output1, output2, output3], np.array([500, 700]))
boxes, box_classes, box_scores = yolo.filter_boxes(boxes, box_confidences, box_class_probs)
boxes, box_classes, box_scores = yolo.non_max_suppression(boxes, box_classes, box_scores)
print('Boxes:', boxes)
print('Box classes:', box_classes)
print('Box scores:', box_scores)

### Task 4

In [None]:
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
images, image_paths = yolo.load_images('../data/yolo')
i = np.random.randint(0, len(images))
cv2.imshow(image_paths[i], images[i])
cv2.waitKey(0)
cv2.destroyAllWindows()

### Task 5

In [None]:
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
images, image_paths = yolo.load_images('../data/yolo')
pimages, image_shapes = yolo.preprocess_images(images)
print(type(pimages), pimages.shape)
print(type(image_shapes), image_shapes.shape)
i = np.random.randint(0, len(images))
print(images[i].shape, ':', image_shapes[i])
cv2.imshow(image_paths[i], pimages[i])
cv2.waitKey(0)
cv2.destroyAllWindows()

### Task 6

In [6]:
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
images, image_paths = yolo.load_images('../data/yolo')
boxes = np.array([[119.22100287, 118.62197718, 567.75985556, 440.44121152],
                  [468.53530752, 84.48338278, 696.04923556, 167.98947829],
                  [124.2043716, 220.43365057, 319.4254314 , 542.13706101]])
box_scores = np.array([0.99537075, 0.91536146, 0.9988506])
box_classes = np.array([1, 7, 16])
ind = 0
for i, name in enumerate(image_paths):
    print(i, name)
    if "dog.jpg" in name:
        ind = i
        break
yolo.show_boxes(images[i], boxes, box_classes, box_scores, "dog.jpg")

0 ../data/yolo/horses.jpg
1 ../data/yolo/takagaki.jpg
2 ../data/yolo/dog.jpg
Press 's'


### Task 7

In [3]:
np.random.seed(0)
anchors = np.array([[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]])
yolo = Yolo('../data/yolo.h5', '../data/coco_classes.txt', 0.6, 0.5, anchors)
predictions, image_paths = yolo.predict('../data/yolo')
for i, name in enumerate(image_paths):
    if "dog.jpg" in name:
        ind = i
        break
print(image_paths[ind])
print(predictions[ind])

2023-04-21 10:50:39.023057: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/ediddev/.local/lib/python3.8/site-packages/cv2/../../lib64:/usr/local/cuda/lib64:
2023-04-21 10:50:39.023501: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-04-21 10:50:39.023744: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (BOBO-CODE): /proc/driver/nvidia/version does not exist
2023-04-21 10:50:39.026762: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebu

Press 's'
Press 's'
Press 's'
Press 's'
Press 's'
Press 's'
../data/yolo/dog.jpg
(array([[124.10596, 220.4373 , 319.45682, 542.3967 ],
       [119.10174, 118.63829, 567.89417, 440.58704],
       [468.6808 ,  84.4819 , 695.9741 , 168.00749]], dtype=float32), array([16,  1,  7]), array([0.99883264, 0.9954546 , 0.91439855], dtype=float32))
