In [1]:
# import the necessary packages
import numpy as np
import tensorflow as tf
import cv2
import time
import os

In [2]:
#open the directory ans set confidence amd threshold for predicting
yolo_dir = os.path.join("coco")
conf = 0.5
threshold = 0.3


In [4]:
# load the COCO class labels our YOLO model was trained on
labelspath = os.path.sep.join([yolo_dir, "coco.names"])
labels = open(labelspath).read().strip().split("\n")

In [12]:
# initialize a list of colors to represent each possible class label
np.random.seed(42)
colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")

In [14]:
# derive the paths to the YOLO weights and model configuration
weights_path = os.path.sep.join([yolo_dir, "yolov3.weights"])
config_path = os.path.sep.join([yolo_dir, "yolov3.cfg"])


In [15]:
# load our YOLO object detector trained on COCO dataset (80 classes)
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)

In [25]:
# load our input image and grab its spatial dimensions
#image = cv2.imread("images/baggage_claim.jpg")
#(H,W) = image.shape[:2]
# determine only the *output* layer names that we need from YOLO
#ln = net.getLayerNames()
#ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# construct a blob from the input image and then perform a forward
# pass of the YOLO object detector, giving us our bounding boxes and
# associated probabilities
#blob = cv2.dnn.blobFromImage(image, 1/255.0, (416,416), swapRB=True, crop=False)
#net.setInput(blob)
#start = time.time()
#layeroutputs = net.forward(ln)
#end = time.time()

# show timing information on YOLO
#print("[INFO] YOLO took {:.6f} seconds".format(end - start))


# Initialize the video capture from the default camera (index 0)
cap = cv2.VideoCapture(0)
address="http://192.168.29.6:8080/" #IP Address of the camera generated from the phone 
cap.open(address) #captured into the address and opened 

while True:
    # Capture a frame from the camera
    ret, frame = cap.read()
    if not ret:
        break

    # Grab the spatial dimensions of the frame
    (H, W) = frame.shape[:2]

    # Determine only the *output* layer names that we need from YOLO
    ln = net.getLayerNames()
    ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    # Construct a blob from the input image and perform a forward
    # pass of the YOLO object detector, giving us our bounding boxes and
    # associated probabilities
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    start = time.time()
    layeroutputs = net.forward(ln)
    end = time.time()

    # Show timing information on YOLO
    print("[INFO] YOLO took {:.6f} seconds".format(end - start))

[INFO] YOLO took 1.648670 seconds


In [26]:
# Initialize our lists of detected bounding boxes, confidences, and class IDs, respectively

    boxes = []
    confidences = []
    classids = []

    # Loop over each of the layer outputs
    for output in layeroutputs:
        # Loop over each of the detections
        for detection in output:
            # Extract the class ID and confidence of the current object detection
            scores = detection[5:]
            classid = np.argmax(scores)
            confidence = scores[classid]

            # Filter out weak predictions by ensuring the detected probability is greater than the minimum probability
            if confidence > conf:
                # Scale the bounding box coordinates back relative to the size of the image
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                # Use the center (x, y)-coordinates to derive the top and left corner of the bounding box
                X = int(centerX - (width / 2))
                Y = int(centerY - (height / 2))

                # Update our list of bounding box coordinates, confidences, and class IDs
                boxes.append([X, Y, int(width), int(height)])
                confidences.append(float(confidence))
                classids.append(classid)

    # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, conf, threshold)



In [32]:
 # Ensure at least one detection exists
    if len(idxs) > 0:
        # Loop over the indexes we are keeping
        for i in idxs.flatten():
            # Extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            # Draw a bounding box rectangle and label on the image
            color = [int(c) for c in colors[classids[i]]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            text = "{}: {:.4f}".format(labels[classids[i]], confidences[i])
            cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Show the output frame
    cv2.imshow("Object Detection", frame)

    # Break the loop if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

27

In [None]:
# Release the video capture object and close all windows
cap.release()
cv2.destroyAllWindows()