In [1]:
import cv2
import numpy as np
import time

In [2]:
camera = cv2.VideoCapture(0)
h,w = None,None

In [3]:
with open('YOLO-3/YOLO-3-OpenCV/yolo-coco-data/coco.names') as f:
    labels = [line.strip() for line in f]

In [4]:
network = cv2.dnn.readNetFromDarknet('YOLO-3/YOLO-3-OpenCV/yolo-coco-data/yolov3.cfg',
                                        'YOLO-3/YOLO-3-OpenCV/yolo-coco-data/yolov3.weights')

In [5]:
layers_names_all = network.getLayerNames()

In [6]:
layers_name_output = [layers_names_all[i-1]for i in network.getUnconnectedOutLayers()]

In [7]:
print(layers_name_output)

['yolo_82', 'yolo_94', 'yolo_106']


In [8]:
probability_minum = 0.5
threshold = 0.3

In [9]:
colours = np.random.randint(0,255,size=(len(labels),3),dtype='uint8')

In [10]:
while True:
    _,frame = camera.read()

    if w is None or h is None:
        h,w = frame.shape[:2]
    
    blob = cv2.dnn.blobFromImage(frame,1/255.0,(416,416),swapRB=True,crop= False)

    #implementing forward pass
    network.setInput(blob)
    start = time.time()
    outpur_from_network = network.forward(layers_name_output)
    end = time.time()

    print('current frame took{:.5f} seconds'.format(end-start))

    bounding_boxes=[]
    confidences =[]
    class_numbers = []

    for results in outpur_from_network:
        for detected_objects in results:

            scores = detected_objects[5:]

            class_current = np.argmax(scores)

            confidence_current = scores[class_current]


            if confidence_current>probability_minum:

                box_current = detected_objects[0:4] * np.array([w, h, w, h])

                # Now, from YOLO data format, we can get top left corner coordinates
                # that are x_min and y_min
                x_center, y_center, box_width, box_height = box_current
                x_min = int(x_center - (box_width / 2))
                y_min = int(y_center - (box_height / 2))

                # Adding results into prepared lists
                bounding_boxes.append([x_min, y_min,
                                       int(box_width), int(box_height)])
                confidences.append(float(confidence_current))
                class_numbers.append(class_current)


    results = cv2.dnn.NMSBoxes(bounding_boxes, confidences,
                                        probability_minum, threshold)
    


    if len(results) > 0:
        # Going through indexes of results
        for i in results.flatten():
            # Getting current bounding box coordinates,
            # its width and height
            x_min, y_min = bounding_boxes[i][0], bounding_boxes[i][1]
            box_width, box_height = bounding_boxes[i][2], bounding_boxes[i][3]

            # Preparing colour for current bounding box
            # and converting from numpy array to list
            colour_box_current = colours[class_numbers[i]].tolist()

            # # # Check point
            # print(type(colour_box_current))  # <class 'list'>
            # print(colour_box_current)  # [172 , 10, 127]

            # Drawing bounding box on the original current frame
            cv2.rectangle(frame, (x_min, y_min),
                          (x_min + box_width, y_min + box_height),
                          colour_box_current, 2)

            # Preparing text with label and confidence for current bounding box
            text_box_current = '{}: {:.4f}'.format(labels[int(class_numbers[i])],
                                                   confidences[i])

            # Putting text with label and confidence on the original image
            cv2.putText(frame, text_box_current, (x_min, y_min - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, colour_box_current, 2)
            
    

    cv2.namedWindow('YOLO v3 Real Time Detections', cv2.WINDOW_NORMAL)
    # Pay attention! 'cv2.imshow' takes images in BGR format
    cv2.imshow('YOLO v3 Real Time Detections', frame)

    # Breaking the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break





# Releasing camera
camera.release()
# Destroying all opened OpenCV windows
cv2.destroyAllWindows()

         





current frame took0.52963 seconds
current frame took0.32934 seconds
current frame took0.32984 seconds
current frame took0.33676 seconds
current frame took0.29363 seconds
current frame took0.31049 seconds
current frame took0.31893 seconds
current frame took0.31546 seconds
current frame took0.30454 seconds
current frame took0.30454 seconds
current frame took0.44442 seconds
current frame took0.32141 seconds
current frame took0.33133 seconds
current frame took0.50790 seconds
current frame took0.35762 seconds
current frame took0.35265 seconds
current frame took0.41014 seconds
current frame took0.36010 seconds
current frame took0.42161 seconds
current frame took0.65671 seconds
current frame took0.48760 seconds
current frame took0.46426 seconds
current frame took0.54858 seconds
current frame took0.42309 seconds
current frame took0.37115 seconds
current frame took0.47517 seconds
current frame took0.32934 seconds
current frame took0.31865 seconds
current frame took0.43301 seconds
current frame 