This notebook is an introduction to how we can use YOLOv3 to process images

First, verify that all dependancies are satisfied:

In [1]:
!pip install opencv-python
!pip install matplotlib



In [16]:
import cv2 as cv
import numpy as np
from copy import deepcopy

weigths_path = "data/weights/"
cfg_path = "data/cfg/"
class_path = "data/"

black = (0, 0, 0)
white = (255, 255, 255)
font = cv.FONT_HERSHEY_TRIPLEX
font_scale = 0.5
thickness = 2

def load(weights, cfg):
    weights = weigths_path + weights
    cfg = cfg_path + cfg
    return cv.dnn.readNet(weights, cfg)

def prepare_classes(classes):
    with(open(class_path + classes, "r")) as f:
        return [line.strip() for line in f.readlines()]

def extract_output_layers(net):
    names = net.getLayerNames()
    return [names[layer[0] - 1] for layer in net.getUnconnectedOutLayers()]

def image_to_blob(image, size=(416, 416)):
    return cv.dnn.blobFromImage(image, scalefactor=1/255, size=(416, 416), swapRB=True, crop=False)

def forward(net, blob, output_layers):
    net.setInput(blob)
    return net.forward(output_layers)

def postprocess(image, outputs, classes, threshold=0.8, nms_threshold=0.6):
    height, width, _ = image.shape

    image = deepcopy(image)

    class_ids = []
    confidences = []
    boxes = []

    for o in outputs:
        for detection in o:
            center_x = detection[0] * width
            center_y = detection[1] * height
            w = detection[2] * width
            h = detection[3] * height
            x = center_x - w/2
            y = center_y - h/2

            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            boxes.append([int(x), int(y), int(w), int(h)])
            confidences.append(float(confidence))
            class_ids.append(class_id)


    indices = cv.dnn.NMSBoxes(boxes, confidences, threshold, nms_threshold)

    for i in indices:
        i = i[0]
        label = str(classes[class_ids[i]])
        draw_box(image, label, boxes[i])
        
    return image

def convertToCoordinates(x, y, w, h):
    return [int(x), int(y), int(w), int(h)]

def draw_box(image, label, box):
    x,y,w,h = box
    tw, th = cv.getTextSize(label, font, font_scale, thickness=thickness)[0]
    cv.rectangle(image, (x, y), (x + w, y + h), black, thickness=thickness)
    cv.rectangle(image, (x, y), (x + tw + 10, y - th - 10), black, cv.FILLED)
    cv.putText(image, label, (x + 5, y - 5), font, font_scale, (255, 255, 255), thickness=1)

In [3]:
from matplotlib import pyplot as plt

image_path = "data/images/"

def load_image(image, x_scale=1, y_scale=1):
    image = image_path + image
    img = cv.imread(image)
    img = cv.resize(img, None, fx=x_scale, fy=y_scale)
    return img

def show_image(image):
    cv.imshow("image", image)
    cv.waitKey(0)
    cv.destroyAllWindows()

In [8]:
weights = "yolov3.weights"
cfg = "yolov3.cfg"
classes = "coco.names"
image = "demo.jpg"

net = load(weights, cfg)
classes = prepare_classes(classes)

output_layers = extract_output_layers(net)

img = load_image(image)

blob = image_to_blob(img)

outputs = forward(net, blob, output_layers)

In [17]:
processed_image = postprocess(img, outputs, classes, 0.8)

show_image(processed_image)

In [5]:
print(img.shape)

(371, 660, 3)
