In [20]:
# import the necessary packages
import numpy as np
import argparse
import imutils
import time
import cv2
import os
from imutils.video import FPS
from imutils.video import VideoStream


RTSP_URL= "C:\\Users\\Hari\\Downloads\\Computer-Vision-with-Python_2\\Computer-Vision-with-Python\\06-Deep-Learning-Computer-Vision\\06-YOLOv3\\videos\\test\\store-aisle-detection.mp4"
YOLO_PATH="yolo-coco"
OUTPUT_FILE="output/outfile.avi"
# load the COCO class labels our YOLO model was trained on
labelsPath = os.path.sep.join([YOLO_PATH, "coco.names"])
LABELS = open(labelsPath).read().strip().split("\n")
CONFIDENCE=0.5
THRESHOLD=0.3

# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),dtype="uint8")

# derive the paths to the YOLO weights and model configuration
weightsPath = os.path.sep.join([YOLO_PATH, "yolov3.weights"])
configPath = os.path.sep.join([YOLO_PATH, "yolov3.cfg"])

# load our YOLO object detector trained on COCO dataset (80 classes)
# and determine only the *output* layer names that we need from YOLO
print("[INFO] loading YOLO from disk...")
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# initialize the video stream, pointer to output video file, and
# frame dimensions
vs = cv2.VideoCapture(RTSP_URL)
# vs = cv2.VideoCapture(0)
time.sleep(2.0)
fps = FPS().start()
writer = None
(W, H) = (None, None)

cnt=0



# loop over frames from the video file stream
while True:
    cnt+=1
    # read the next frame from the file
    (grabbed, frame) = vs.read()

    # if the frame was not grabbed, then we have reached the end
    # of the stream
    if not grabbed:
        break
    # if the frame dimensions are empty, grab them
    if W is None or H is None:
        (H, W) = frame.shape[:2]

    # construct a blob from the input frame and then perform a forward
    # pass of the YOLO object detector, giving us our bounding boxes
    # and associated probabilities
    if(cnt%30 == 0):
        blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),swapRB=True, crop=False)
        net.setInput(blob)
        start = time.time()
        layerOutputs = net.forward(ln)
        end = time.time()
        # initialize our lists of detected bounding boxes, confidences,
        # and class IDs, respectively
        boxes = []
        confidences = []
        classIDs = []

        # loop over each of the layer outputs
        for output in layerOutputs:
            # loop over each of the detections
            for detection in output:
                # extract the class ID and confidence (i.e., probability)
                # of the current object detection
                scores = detection[5:]
                classID = np.argmax(scores)
                confidence = scores[classID]

                # filter out weak predictions by ensuring the detected
                # probability is greater than the minimum probability
                if confidence > CONFIDENCE:
                    # scale the bounding box coordinates back relative to
                    # the size of the image, keeping in mind that YOLO
                    # actually returns the center (x, y)-coordinates of
                    # the bounding box followed by the boxes' width and
                    # height
                    box = detection[0:4] * np.array([W, H, W, H])
                    (centerX, centerY, width, height) = box.astype("int")

                    # use the center (x, y)-coordinates to derive the top
                    # and and left corner of the bounding box
                    x = int(centerX - (width / 2))
                    y = int(centerY - (height / 2))

                    # update our list of bounding box coordinates,
                    # confidences, and class IDs
                    boxes.append([x, y, int(width), int(height)])
                    confidences.append(float(confidence))
                    classIDs.append(classID)

        # apply non-maxima suppression to suppress weak, overlapping
        # bounding boxes
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE, THRESHOLD)

    # ensure at least one detection exists
    if len(idxs) > 0:
        # loop over the indexes we are keeping
        for i in idxs.flatten():
            # extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            # draw a bounding box rectangle and label on the frame
            color = [int(c) for c in COLORS[classIDs[i]]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
            cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)


    cv2.imshow("Frame", cv2.resize(frame, (800, 600)))
    key = cv2.waitKey(1) & 0xFF
    #print ("key", key)
    # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break

    # update the FPS counter
    fps.update()

# stop the timer and display FPS information
fps.stop()


print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))

# do a bit of cleanup
cv2.destroyAllWindows()
# release the file pointers
print("[INFO] cleaning up...")
vs.release()


[INFO] loading YOLO from disk...
[INFO] elasped time: 50.54
[INFO] approx. FPS: 77.58
[INFO] cleaning up...


In [21]:
print(confidences)

[0.9648265838623047, 0.9579932689666748, 0.9967951774597168, 0.9169735312461853, 0.9865075349807739, 0.5342775583267212, 0.996575117111206, 0.8275232315063477, 0.8932504057884216, 0.9990975260734558, 0.9834385514259338, 0.9315935969352722, 0.9708663821220398, 0.9925567507743835, 0.9985137581825256, 0.882327675819397, 0.9430353045463562, 0.5979639887809753, 0.5223633646965027, 0.7724838256835938, 0.8240010738372803, 0.901584267616272, 0.5795354843139648, 0.7492005825042725, 0.7277642488479614, 0.658200740814209, 0.6824901103973389, 0.5834359526634216, 0.6575490832328796, 0.5872204899787903, 0.5317538380622864, 0.5206746459007263, 0.5675079822540283, 0.827658474445343, 0.8111899495124817, 0.5230939984321594, 0.5202757120132446, 0.5888984799385071]


In [22]:
print(classIDs)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45]


In [24]:
LABELS[45]

'bowl'