In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time

In [2]:
yolo = cv2.dnn.readNet("./yolov3.weights", "./yolov3.cfg")

In [3]:
classes = []

with open("./coco.names", 'r') as f:
    classes = f.read().splitlines()
len(classes)

80

In [4]:
cap = cv2.VideoCapture(0) 

starting_time = time.time()
frame_id = 0
while True:
    _, frame = cap.read()
    frame_id += 1
    
    height,width,channels = frame.shape
    
    blob = cv2.dnn.blobFromImage(frame, 1/255, (320,320), (0,0,0), swapRB = True, crop = False)
    yolo.setInput(blob)
    
    output_layer_names = yolo.getLayerNames()
    output_layers = [output_layer_names[i[0] - 1] for i in yolo.getUnconnectedOutLayers()]
    layer_output = yolo.forward(output_layers)
    
    boxes = []
    confidences = []
    class_Ids = []

    for output in layer_output:
        for detection in output:
            score = detection[5:]
            class_id = np.argmax(score)
            confidence = score[class_id]
            if confidence > 0.5:
                center_x = int(detection[0]*width)
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)
             
                #rectangle coordinates
                x = int(center_x - w /2)
                y = int(center_y - h /2)
            
                #cv2.rectangle(image, (x, y), (x+w, y+h), (0,255,0), 2)
                boxes.append([x,y,w,h])
                confidences.append(float(confidence))
                class_Ids.append(class_id)
                
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) #to reduce multiple box of same object
    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size = (len(boxes),3))
    number_of_boxes = len(boxes)
    for i in range(number_of_boxes):
        if i in indexes:
            x,y,w,h = boxes[i]
            label = str(classes[class_Ids[i]])
            confi = str(round(confidences[i],2)*100)
            color = colors[i]
            cv2.rectangle(frame, (x,y), (x+w, y+h), color, 1)
            cv2.rectangle(frame, (x,y), (x+w, y-15), color, -1)
            cv2.putText(frame, label +" "+confi+"%", (x,y-2), font, 1, (255,255,255), 1)
    
    elapsed_time = time.time() - starting_time
    fps = frame_id / elapsed_time
    cv2.putText(frame, "FPS: " + str(fps), (5,15), font, 1, (255,255,255), 1)
    cv2.imshow("Image", frame)
    key = cv2.waitKey(1)
    if key == 27:
        break
    
cap.release()
cv2.destroyAllWindows()