In [3]:
# Jupyter notebook code

# Import necessary libraries
from pyimagesearch.centroidtracker import CentroidTracker
from imutils.video import VideoStream
import numpy as np
import argparse
import imutils
import time
import cv2

# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", required=True,
                help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
                help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
                help="minimum probability to filter weak detections")
args = vars(ap.parse_args())

# initialize our centroid tracker and frame dimensions
ct = CentroidTracker()
(H, W) = (None, None)

# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])

# initialize the video stream and allow the camera sensor to warmup
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)

# loop over the frames from the video stream
while True:
    # read the next frame from the video stream and resize it
    frame = vs.read()
    frame = imutils.resize(frame, width=400)

    # if the frame dimensions are None, grab them
    if W is None or H is None:
        (H, W) = frame.shape[:2]

    # construct a blob from the frame, pass it through the network,
    # obtain our output predictions, and initialize the list of
    # bounding box rectangles
    blob = cv2.dnn.blobFromImage(frame, 1.0, (W, H),
                                (104.0, 177.0, 123.0))
    net.setInput(blob)
    detections = net.forward()
    rects = []

    # loop over the detections
    for i in range(0, detections.shape[2]):
        # filter out weak detections by ensuring the predicted
        # probability is greater than a minimum threshold
        if detections[0, 0, i, 2] > args["confidence"]:
            # compute the (x, y)-coordinates of the bounding box for
            # the object, then update the bounding box rectangles list
            box = detections[0, 0, i, 3:7] * np.array([W, H, W, H])
            rects.append(box.astype("int"))

            # draw a bounding box surrounding the object so we can
            # visualize it
            (startX, startY, endX, endY) = box.astype("int")
            cv2.rectangle(frame, (startX, startY), (endX, endY),
                          (0, 255, 0), 2)

    # update our centroid tracker using the computed set of bounding
    # box rectangles
    objects = ct.update(rects)

    # loop over the tracked objects
    for (objectID, centroid) in objects.items():
        # draw both the ID of the object and the centroid of the
        # object on the output frame
        text = "ID {}".format(objectID)
        cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)

    # show the output frame
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break

# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()


usage: ipykernel_launcher.py [-h] -p PROTOTXT -m MODEL [-c CONFIDENCE]
ipykernel_launcher.py: error: the following arguments are required: -p/--prototxt, -m/--model


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [2]:
# Import necessary libraries
from pyimagesearch.centroidtracker import CentroidTracker
from imutils.video import VideoStream
import numpy as np
import imutils
import time
import cv2

# Manually set the values for prototxt, model, and confidence
prototxt = "./deploy.prototxt"
model = "./res10_300x300_ssd_iter_140000.caffemodel"
confidence = 0.5

# initialize our centroid tracker and frame dimensions
ct = CentroidTracker()
(H, W) = (None, None)

# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(prototxt, model)

# initialize the video stream and allow the camera sensor to warmup
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)

# loop over the frames from the video stream
while True:
    # read the next frame from the video stream and resize it
    frame = vs.read()
    frame = imutils.resize(frame, width=400)

    # if the frame dimensions are None, grab them
    if W is None or H is None:
        (H, W) = frame.shape[:2]

    # construct a blob from the frame, pass it through the network,
    # obtain our output predictions, and initialize the list of
    # bounding box rectangles
    blob = cv2.dnn.blobFromImage(frame, 1.0, (W, H), (104.0, 177.0, 123.0))
    net.setInput(blob)
    detections = net.forward()
    rects = []

    # loop over the detections
    for i in range(0, detections.shape[2]):
        # filter out weak detections by ensuring the predicted
        # probability is greater than a minimum threshold
        if detections[0, 0, i, 2] > confidence:
            # compute the (x, y)-coordinates of the bounding box for
            # the object, then update the bounding box rectangles list
            box = detections[0, 0, i, 3:7] * np.array([W, H, W, H])
            rects.append(box.astype("int"))

            # draw a bounding box surrounding the object so we can
            # visualize it
            (startX, startY, endX, endY) = box.astype("int")
            cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2)

    # update our centroid tracker using the computed set of bounding
    # box rectangles
    objects = ct.update(rects)

    # loop over the tracked objects
    for (objectID, centroid) in objects.items():
        # draw both the ID of the object and the centroid of the
        # object on the output frame
        text = "ID {}".format(objectID)
        cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)

    # show the output frame
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break

# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()


[INFO] loading model...
[INFO] starting video stream...


In [3]:
# Import necessary libraries
from pyimagesearch.centroidtracker import CentroidTracker
import numpy as np
import time
import cv2

# Manually set the values for YOLO
config_path = "./yolov3.cfg"
weights_path = "./yolov3.weights"
confidence_threshold = 0.5

# Load YOLO
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
ln = net.getUnconnectedOutLayersNames()

# initialize our centroid tracker and frame dimensions
ct = CentroidTracker()
(H, W) = (None, None)

# initialize the video stream from a given video file
video_path = "./input_video.mp4"
vs = cv2.VideoCapture(video_path)

# loop over the frames from the video stream
while True:
    # read the next frame from the video stream
    ret, frame = vs.read()

    # if the frame is not grabbed, we have reached the end of the video
    if not ret:
        break

    # resize the frame for processing
    frame = cv2.resize(frame, (400, 300))

    # if the frame dimensions are None, grab them
    if W is None or H is None:
        (H, W) = frame.shape[:2]

    # Detect objects using YOLO
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    detections = net.forward(ln)

    # List to store bounding box rectangles
    rects = []

    # loop over the detections
    for detection in detections:
        for obj in detection:
            scores = obj[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            if confidence > confidence_threshold:
                # scale bounding box coordinates back relative to the size of the image
                box = obj[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                rects.append((x, y, int(width), int(height)))

    # update our centroid tracker using the computed set of bounding box rectangles
    objects = ct.update(rects)

    # loop over the tracked objects
    for (objectID, centroid) in objects.items():
        # draw both the ID of the object and the centroid of the object on the output frame
        text = "ID {}".format(objectID)
        cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)

    # show the output frame
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break

# release the video stream and close all windows
vs.release()
cv2.destroyAllWindows()


In [1]:
# Import necessary libraries
from pyimagesearch.centroidtracker import CentroidTracker
import numpy as np
import time
import cv2

# Manually set the values for YOLO (use YOLOv3-tiny for faster inference)
config_path = "./yolov3-tiny.cfg"
weights_path = "./yolov3-tiny.weights"
confidence_threshold = 0.2

# Load YOLO
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
ln = net.getUnconnectedOutLayersNames()

# initialize our centroid tracker and frame dimensions
ct = CentroidTracker()
(H, W) = (None, None)

# initialize the video stream from a given video file
video_path = "./input_video.mp4"
vs = cv2.VideoCapture(video_path)

# loop over the frames from the video stream
while True:
    # read the next frame from the video stream
    ret, frame = vs.read()

    # if the frame is not grabbed, we have reached the end of the video
    if not ret:
        break

    # resize the frame for faster processing
    frame = cv2.resize(frame, (400, 300))

    # if the frame dimensions are None, grab them
    if W is None or H is None:
        (H, W) = frame.shape[:2]

    # Detect objects using YOLO
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    detections = net.forward(ln)

    # List to store bounding box rectangles
    rects = []

    # loop over the detections
    for detection in detections:
        for obj in detection:
            scores = obj[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            if confidence > confidence_threshold:
                # scale bounding box coordinates back relative to the size of the image
                box = obj[0:4] * np.array([W, H, W, H])
                (x, y, width, height) = box.astype("int")

                # ensure the bounding box coordinates fall within the frame dimensions
                x = max(0, x)
                y = max(0, y)
                width = min(W, width)
                height = min(H, height)

                rects.append((x, y, width, height))

    # update our centroid tracker using the computed set of bounding box rectangles
    objects = ct.update(rects)

    # loop over the tracked objects
    for (objectID, centroid) in objects.items():
        # draw both the ID of the object and the centroid of the object on the output frame
        text = "ID {}".format(objectID)
        cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)

    # show the output frame
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break

# release the video stream and close all windows
vs.release()
cv2.destroyAllWindows()


In [3]:
# Import necessary libraries
from pyimagesearch.centroidtracker import CentroidTracker
import numpy as np
import time
import cv2

# Manually set the values for YOLO (use YOLOv3-tiny for faster inference)
config_path = "./yolov3-tiny.cfg"
weights_path = "./yolov3-tiny.weights"
confidence_threshold = 0.5

# Load YOLO
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
ln = net.getUnconnectedOutLayersNames()

# initialize our centroid tracker and frame dimensions
ct = CentroidTracker()
(H, W) = (None, None)

# initialize the video stream from a given video file
video_path = "./input_video.mp4"
vs = cv2.VideoCapture(video_path)

# loop over the frames from the video stream
while True:
    # read the next frame from the video stream
    ret, frame = vs.read()

    # if the frame is not grabbed, we have reached the end of the video
    if not ret:
        break

    # resize the frame for faster processing
    frame = cv2.resize(frame, (400, 300))

    # if the frame dimensions are None, grab them
    if W is None or H is None:
        (H, W) = frame.shape[:2]

    # Detect objects using YOLO
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    detections = net.forward(ln)

    # List to store bounding box rectangles
    rects = []

    # loop over the detections
    for detection in detections:
        for obj in detection:
            scores = obj[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            if confidence > confidence_threshold:
                # scale bounding box coordinates back relative to the size of the image
                box = obj[0:4] * np.array([W, H, W, H])
                (x, y, width, height) = box.astype("int")

                # ensure the bounding box coordinates fall within the frame dimensions
                x = max(0, x)
                y = max(0, y)
                width = min(W, width)
                height = min(H, height)

                rects.append((x, y, width, height))

    # update our centroid tracker using the computed set of bounding box rectangles
    objects = ct.update(rects)

    # loop over the tracked objects
    for (objectID, centroid) in objects.items():
        # draw both the ID of the object and a square frame around the object on the output frame
        text = "ID {}".format(objectID)
        cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Draw a square frame around the object
        x, y, width, height = rects[objectID]
        cv2.rectangle(frame, (x, y), (x + width, y + height), (0, 255, 0), 2)

    # show the output frame
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break

# release the video stream and close all windows
vs.release()
cv2.destroyAllWindows()


IndexError: list index out of range

: 