In [None]:
DATA_PATH = '../input/3d-object-detection-for-autonomous-vehicles/'

In [None]:
# https://www.youtube.com/watch?v=cPOtULagNnI

In [1]:
from tensorflow.keras.layers import Conv2D

In [4]:
import struct
import numpy as np
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.layers import UpSampling2D
from tensorflow.keras.layers.merge import add, concatenate
from tensorflow.keras.models import Model

ModuleNotFoundError: No module named 'tensorflow.keras.layers.merge'

In [3]:

def _conv_block(inp, convs, skip=True):
    x = inp
    count = 0
    for conv in convs:
        if count == (len(convs) - 2) and skip:
            skip_connection = x
        count += 1
        if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top
        x = Conv2D(conv['filter'],
                    conv['kernel'],
                    strides=conv['stride'],padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top
                    name='conv_' + str(conv['layer_idx']),
                    use_bias=False if conv['bnorm'] else True)(x)
        if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
        if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
        return add([skip_connection, x]) if skip else x

In [None]:

# This is a Python function named _conv_block that defines a convolutional block in a deep neural network. The block consists of multiple convolutional layers with optional batch normalization, leaky ReLU activation, and skip connections.
# Function Parameters:
# inp: The input tensor to the convolutional block.
# convs: A list of dictionaries, where each dictionary defines the parameters for a convolutional layer.
# skip: A boolean flag indicating whether to use skip connections (default: True).
# Function Body:
# Initialize variables: Initializes the input tensor x to the input inp and a counter count to 0.
# Loop through convolutional layers: Iterates through the list of convolutional layer parameters convs.
# Create skip connection: If skip is True and the current layer is the second-to-last layer, creates a skip connection by storing the current tensor x in the variable skip_connection.
# Apply convolutional layer: Applies a convolutional layer with the specified parameters (filter, kernel, stride, padding, etc.) to the current tensor x.
# Apply batch normalization (optional): If the current layer has batch normalization enabled, applies batch normalization to the tensor x.
# Apply leaky ReLU activation (optional): If the current layer has leaky ReLU activation enabled, applies leaky ReLU activation to the tensor x.
# Return output: Returns the output tensor x if skip is False, otherwise returns the sum of the output tensor x and the skip connection tensor skip_connection.
# Example Usage:
# Python
# convs = [
#     {'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
#     {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
#     {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2}
# ]

# inp = Input(shape=(256, 256, 3))
# x = _conv_block(inp, convs)


In [2]:
#  create a YOLOv3 Keras model and save it to file
# based on https://github.com/experiencor/keras-yolo3

 
    

def make_yolov3_model():
    input_image = Input(shape=(None, None, 3))
    # Layer  0 => 4
    x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
                                  {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
                                  {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
                                  {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])
    # Layer  5 => 8
    x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
                        {'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])
    # Layer  9 => 11
    x = _conv_block(x, [{'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])
    # Layer 12 => 15
    x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
                        {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
                        {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])
    # Layer 16 => 36
    for i in range(7):
        x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
                            {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
    skip_36 = x
    # Layer 37 => 40
    x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])
    # Layer 41 => 61
    for i in range(7):
        x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
                            {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
    skip_61 = x
    # Layer 62 => 65
    x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])
    # Layer 66 => 74
    for i in range(3):
        x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
                            {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
    # Layer 75 => 79
    x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], skip=False)
    # Layer 80 => 82
    yolo_82 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 80},
                              {'filter':  255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], skip=False)
    # Layer 83 => 86
    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], skip=False)
    x = UpSampling2D(2)(x)
    x = concatenate([x, skip_61])
    # Layer 87 => 91
    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], skip=False)
    # Layer 92 => 94
    yolo_94 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 92},
                              {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], skip=False)
    # Layer 95 => 98
    x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True,   'layer_idx': 96}], skip=False)
    x = UpSampling2D(2)(x)
    x = concatenate([x, skip_36])
    # Layer 99 => 106
    yolo_106 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 99},
                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 100},
                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 101},
                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 102},
                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 103},
                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 104},
                               {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False)
    model = Model(input_image, [yolo_82, yolo_94, yolo_106])
    return model

In [5]:
from matplotlib import pyplot
from matplotlib.patches import Rectangle

label_map = {
    "person": "blue",
    "bicycle": "yellow", 
    "car": "red",
    "truck": "green",
    "motorbike": "white", 
    "aeroplane": "white", 
    "bus": "white",
    "train": "white", 
    "boat": "white"
}

In [None]:
# xmin, ymin, xmax, ymax: The coordinates of the bounding box.
# objness: The objectness score of the bounding box (optional).
# classes: The class probabilities of the bounding box (optional).
# label: The predicted label of the bounding box (initialized to -1).
# score: The predicted score of the bounding box (initialized to -1).
# Methods:
# __init__:
# The constructor initializes the bounding box with the given coordinates and optional objectness and class probabilities.
# get_label:
# Returns the predicted label of the bounding box. If the label is not already computed, it calculates the label by taking the argmax of the class probabilities.
# get_score:
# Returns the predicted score of the bounding box. If the score is not already computed, it calculates the score by taking the class probability corresponding to the predicted label.
# Example Usage:
# Python
# # Create a bounding box
# box = BoundBox(10, 20, 30, 40, objness=0.8, classes=[0.1, 0.2, 0.7])

# # Get the predicted label
# label = box.get_label()
# print(label)  # Output: 2

# # Get the predicted score
# score = box.get_score()
# print(score)  # Output: 0.7



In [6]:
class BoundBox:
    def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        self.objness = objness
        self.classes = classes
        self.label = -1
        self.score = -1

    def get_label(self):
        if self.label == -1:
            self.label = np.argmax(self.classes)
        return self.label

    def get_score(self):
        if self.score == -1:
            self.score = self.classes[self.get_label()]
        return self.score

In [7]:
def _sigmoid(x):
    return 1. / (1. + np.exp(-x))

In [None]:
# This is a Python function named decode_netout that decodes the output of a neural network, specifically a YOLO (You Only Look Once) object detection model. Here's a breakdown of the function:
# Purpose:
# The function takes the output of a YOLO model and converts it into a list of bounding boxes, each representing a detected object.
# Parameters:
# netout: The output of the YOLO model, a 4D numpy array.
# anchors: A list of anchor boxes, used to calculate the bounding box coordinates.
# obj_thresh: The objectness threshold, used to filter out low-confidence detections.
# net_h and net_w: The height and width of the input image, used to scale the bounding box coordinates.
# Step-by-Step Explanation:
# Reshape the output: Reshapes the netout array to have shape (grid_h, grid_w, nb_box, -1), where grid_h and grid_w are the height and width of the grid, nb_box is the number of bounding boxes per grid cell, and -1 represents the remaining dimensions.
# Apply sigmoid activation: Applies the sigmoid activation function to the first two elements of the netout array (representing the x and y coordinates of the bounding box) and to the elements starting from the 5th position (representing the objectness score and class probabilities).
# Calculate objectness score: Calculates the objectness score by multiplying the objectness score with the class probabilities.
# Filter out low-confidence detections: Filters out detections with an objectness score less than the specified obj_thresh.
# Calculate bounding box coordinates: Calculates the bounding box coordinates (x, y, w, h) using the anchor boxes and the grid cell coordinates.
# Create BoundBox objects: Creates BoundBox objects for each detected object, containing the bounding box coordinates, objectness score, and class probabilities.
# Return the list of bounding boxes: Returns the list of BoundBox objects.
# Example Usage:
# Python
# netout = ...  # output of the YOLO model
# anchors = ...  # list of anchor boxes
# obj_thresh = 0.5  # objectness threshold
# net_h, net_w = 416, 416  # input image dimensions

# boxes = decode_netout(netout, anchors, obj_thresh, net_h, net_w)



In [None]:
def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
    grid_h, grid_w = netout.shape[:2]
    nb_box = 3
    netout = netout.reshape((grid_h, grid_w, nb_box, -1))
    nb_class = netout.shape[-1] - 5
    boxes = []
    netout[..., :2]  = _sigmoid(netout[..., :2])
    netout[..., 4:]  = _sigmoid(netout[..., 4:])
    netout[..., 5:]  = netout[..., 4][..., np.newaxis] * netout[..., 5:]
    netout[..., 5:] *= netout[..., 5:] > obj_thresh
    for i in range(grid_h*grid_w):
        row = i / grid_w
        col = i % grid_w
        for b in range(nb_box):
            # 4th element is objectness score
            objectness = netout[int(row)][int(col)][b][4]
            if(objectness.all() <= obj_thresh): continue
            # first 4 elements are x, y, w, and h
            x, y, w, h = netout[int(row)][int(col)][b][:4]
            x = (col + x) / grid_w # center position, unit: image width
            y = (row + y) / grid_h # center position, unit: image height
            w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
            h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
            # last elements are class probabilities
            classes = netout[int(row)][col][b][5:]
            box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
            boxes.append(box)

    return boxes
    
    

In [None]:
# This is a Python function named correct_yolo_boxes that corrects the bounding box coordinates predicted by a YOLO (You Only Look Once) object detection model. Here's a breakdown of the function:
# Purpose:
# The function takes the predicted bounding boxes, image dimensions, and network dimensions as input, and corrects the bounding box coordinates to match the original image dimensions.
# Parameters:
# boxes: A list of BoundBox objects, representing the predicted bounding boxes.
# image_h and image_w: The height and width of the original image.
# net_h and net_w: The height and width of the network's input image.
# Step-by-Step Explanation:
# Calculate scaling factors: Calculates the scaling factors x_scale and y_scale to convert the network's output coordinates to the original image coordinates.
# Calculate offset factors: Calculates the offset factors x_offset and y_offset to account for the difference in image dimensions.
#     This is a Python function named correct_yolo_boxes that corrects the bounding box coordinates predicted by a YOLO (You Only Look Once) object detection model. Here's a breakdown of the function:
# Purpose:
# The function takes the predicted bounding boxes, image dimensions, and network dimensions as input, and corrects the bounding box coordinates to match the original image dimensions.
# Parameters:
# boxes: A list of BoundBox objects, representing the predicted bounding boxes.
# image_h and image_w: The height and width of the original image.
# net_h and net_w: The height and width of the network's input image.
# Step-by-Step Explanation:
# Calculate scaling factors: Calculates the scaling factors x_scale and y_scale to convert the network's output coordinates to the original image coordinates.
# Calculate offset factors: Calculates the offset factors x_offset and y_offset to account for the difference in image dimensions.
#     boxes = ...  # list of BoundBox objects
# image_h, image_w = 416, 416  # original image dimensions
# net_h, net_w = 416, 416  # network's input image dimensions

# correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)

In [None]:
def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
    new_w, new_h = net_w, net_h
    for i in range(len(boxes)):
        x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
        y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
        boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
        boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
        boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
        boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)

In [None]:

# This is a Python function named _interval_overlap that calculates the overlap between two intervals. Here's a breakdown of the function:
# Purpose:
# The function takes two intervals, interval_a and interval_b, as input and returns the length of their overlap.
# Parameters:
# interval_a: The first interval, represented as a tuple (x1, x2).
# interval_b: The second interval, represented as a tuple (x3, x4).
# Step-by-Step Explanation:
# Unpack intervals: Unpacks the intervals into their respective start and end points: x1, x2, x3, and x4.
# Check for no overlap: Checks if the intervals do not overlap by verifying if one interval ends before the other starts. If so, returns 0.
# Calculate overlap: Calculates the overlap by finding the minimum of the two end points (x2 and x4) and subtracting the maximum of the two start points (x1 and x3).
# Example Usage:
# Python


In [None]:

def _interval_overlap(interval_a, interval_b):
    x1, x2 = interval_a
    x3, x4 = interval_b
    if x3 < x1:
        if x4 < x1:
            return 0
        else:
            return min(x2,x4) - x1
    else:
        if x2 < x3:
            return 0
        else:
            return min(x2,x4) - x3

In [None]:
# This is a Python function named bbox_iou that calculates the Intersection over Union (IoU) between two bounding boxes. Here's a breakdown of the function:
# Purpose:
# The function takes two bounding boxes, box1 and box2, as input and returns their IoU score.
# Parameters:
# box1 and box2: Two bounding boxes, each with attributes xmin, xmax, ymin, and ymax.
# Step-by-Step Explanation:
# Calculate intersection width: Calculates the intersection width between the two boxes using the _interval_overlap function.
# Calculate intersection height: Calculates the intersection height between the two boxes using the _interval_overlap function.
# Calculate intersection area: Calculates the intersection area by multiplying the intersection width and height.
# Calculate box areas: Calculates the areas of both boxes by multiplying their widths and heights.
# Calculate union area: Calculates the union area by adding the areas of both boxes and subtracting the intersection area.
# Calculate IoU: Calculates the IoU by dividing the intersection area by the union area.
# IoU Interpretation:
# The IoU score ranges from 0 to 1, where:
# 0: No overlap between the boxes.
# 1: Complete overlap between the boxes.
# Values between 0 and 1: Partial overlap between the boxes.
# Example Usage:
# Python
# box1 = BoundBox(10, 20, 30, 40)
# box2 = BoundBox(20, 30, 40, 50)

# iou = bbox_iou(box1, box2)
# print(iou)  # Output: IoU score between 0 and 1

# Ask Meta AI anything...
# Messages are generated by AI and may b

In [None]:
def bbox_iou(box1, box2):
    intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
    intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
    intersect = intersect_w * intersect_h
    w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
    w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
    union = w1*h1 + w2*h2 - intersect
    return float(intersect) / union
    

In [None]:

def do_nms(boxes, nms_thresh):
    if len(boxes) > 0:
        nb_class = len(boxes[0].classes)
    else:
        return
    for c in range(nb_class):
        sorted_indices = np.argsort([-box.classes[c] for box in boxes])
        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]
            if boxes[index_i].classes[c] == 0: continue
            for j in range(i+1, len(sorted_indices)):
                index_j = sorted_indices[j]
                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
                    boxes[index_j].classes[c] = 0

In [None]:
from numpy import expand_dims
from tensorflow.keras.preprocessing.image import load_img, img_to_array

In [None]:
def load_image_pixels(filename, shape):
    # load the image to get its shape
    image = load_img(filename)
    width, height = image.size
    # load the image with the required size
    image = load_img(filename, target_size=shape)
    # convert to numpy array
    image = img_to_array(image)
    # scale pixel values to [0, 1]
    image = image.astype('float32')
    image /= 255.0
    # add a dimension so that we have one sample
    image = expand_dims(image, 0)
    
    return image, width, height

In [None]:
    # get all of the results above a threshold
def get_boxes(boxes, labels, thresh):
    v_boxes, v_labels, v_scores = list(), list(), list()
    # enumerate all boxes
    for box in boxes:
        # enumerate all possible labels
        for i in range(len(labels)):
            # check if the threshold for this label is high enough
            if box.classes[i] > thresh:
                v_boxes.append(box)
                v_labels.append(labels[i])
                v_scores.append(box.classes[i]*100)
                # don't break, many labels may trigger for one box
    return v_boxes, v_labels, v_scores

In [None]:
# load and prepare an imag# draw all results
def draw_boxes(filename, v_boxes, v_labels, v_scores):
    # load the image
    data = pyplot.imread(filename)
    # plot the image
    pyplot.imshow(data)
    # get the context for drawing boxes
    ax = pyplot.gca()
    # plot each box
    for i in range(len(v_boxes)):
        box = v_boxes[i]
        # get coordinates
        y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
        # calculate width and height of the box
        width, height = x2 - x1, y2 - y1
        # create the shape
        rect = Rectangle((x1, y1), width, height, fill=False, color=label_map[v_labels[i]])
        # draw the box
        ax.add_patch(rect)
        # draw text and score in top left corner
        label = "%s (%.3f)" % (v_labels[i], v_scores[i])
        pyplot.text(x1, y1, label, color=label_map[v_labels[i]])
    # show the plot
    pyplot.show()

In [None]:
class WeightReader:
    def __init__(self, weight_file):
        self.weight_file = weight_file

In [None]:
# define the model
model = make_yolov3_model()

# load the model weights
# I have loaded the pretrained weights in a separate dataset
weight_reader = WeightReader('../input/lyft-3d-recognition/yolov3.weights')

# set the model weights into the model
weight_reader.load_weights(model)

# save the model to file

model.save('model.h5')

In [None]:
# load yolov3 model
from tensorflow.keras.models import load_model
model = load_model('model.h5')

In [None]:
model.summary()

In [None]:
# Parameters used in the Dataset, on which YOLOv3 was pretrained
anchors = [[116,90, 156,198, 373,326], [30,61, 62,45, 59,119], [10,13, 16,30, 33,23]]

# define the expected input shape for the model
WIDTH, HEIGHT = 416, 416

# define the probability threshold for detected objects
class_threshold = 0.3

In [None]:
import os
from matplotlib import pyplot as plt
images = os.listdir('../input/3d-object-detection-for-autonomous-vehicles/train_images')[:10]

In [None]:
from numpy import expand_dims
from keras.preprocessing.image import load_img, img_to_array

# load and prepare an image
def load_image_pixels(filename, shape):
    '''
    Function preprocess the images to 416x416, which is the standard input shape for YOLOv3, 
    and also keeps track of the originl shape, which is later used to draw the boxes.
    
    paramters:
    filename {String}: path to the image
    shape {tuple}: shape of the input dimensions of the network
    
    returns:image {PIL}: image of shape 'shape'
    width {int}: original width of the picture
    height {int}: original height of the picture
    '''
    # load the image to get its shape
    image = load_img(filename)
    width, height = image.size
    
    # load the image with the required size
    image = load_img(filename, target_size=shape)
    
    # convert to numpy arrayimage = img_to_array(image)
    
    # scale pixel values to [0, 1]
    image = image.astype('float32')
    image /= 255.0
    
    # add a dimension so that we have one sampleimage = expand_dims(image, 0)
    return image, width, height

In [None]:
for file in images:
    photo_filename = DATA_PATH + 'train_images/' + file
    
    # load picture with old dimensions
    image, image_w, image_h = load_image_pixels(photo_filename, (WIDTH, HEIGHT))
    
    # Predict image
    yhat = model.predict(image)
    
    # Create boxesboxes = list()
    for i in range(len(yhat)):
        # decode the output of the network
        boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, HEIGHT, WIDTH)

    # correct the sizes of the bounding boxes for the shape of the image
    correct_yolo_boxes(boxes, image_h, image_w, HEIGHT, WIDTH)

    # suppress non-maximal boxes
    do_nms(boxes, 0.5)# define the labels (Filtered only the ones relevant for this task, which were used in pretraining the YOLOv3 model)
    labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck","boat"]

    # get the details of the detected objects
    v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)

    # draw what we found 
    draw_boxes(photo_filename, v_boxes, v_labels, v_scores)