**MODEL V8**

In [1]:
! pip install scipy deep_sort-realtime
! pip install ultralytics
! pip install supervision
! pip3 install opencv-python
! pip install numpy
! pip install deep_sort_realtime
! pip install opencv-python
! pip install torch torchvision torchaudio
! pip install matplotlib
! pip install pandas
! pip install seaborn
! pip install scikit-learn
! pip install plotly


Collecting deep_sort-realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Downloading deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep_sort-realtime
Successfully installed deep_sort-realtime-1.3.2
Collecting ultralytics
  Downloading ultralytics-8.2.77-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 kB[0m [31m546.8 kB/s[0m eta [36m0:00:00[0m
Collecting py-cpuinfo (from ultralytics)
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.0-py3-none-any.whl.metadata (8.5 kB)
Downloading ultralytics-8.2.77-py3-none-any.whl (869 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m869.0/869.0 kB[0m [31m5.0 MB/s[0m eta [36m

Version 1 detecting speed in FPS

In [None]:
import cv2
import os
import numpy as np
from ultralytics import YOLO
import supervision as sv
from scipy.spatial.distance import euclidean
from deep_sort_realtime.deepsort_tracker import DeepSort

# Define the video path
MARKET_SQUARE_VIDEO_PATH = "/greyhound1.mp4"

# Initialize the YOLOv8 model
model = YOLO("yolov8n.pt")

# Initialize the DeepSort tracker
tracker = DeepSort(max_age=30)

# Open the video file
cap = cv2.VideoCapture(MARKET_SQUARE_VIDEO_PATH)

# Verify the output directory and permissions
output_dir = "/content"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

if not os.access(output_dir, os.W_OK):
    raise PermissionError(f"Write permission denied for the directory {output_dir}")

# Define the output video path
output_path = os.path.join(output_dir, "object_counting_output_v8_3.mp4")

assert cap.isOpened(), "Error reading video file"

# Get video properties
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

# Initialize VideoWriter with a successful FourCC code
fourcc_code = cv2.VideoWriter_fourcc(*"mp4v")
video_writer = cv2.VideoWriter(output_path, fourcc_code, fps, (w, h))

if not video_writer.isOpened():
    raise IOError(f"Error initializing video writer with path {output_path}")

# Track previous positions of dogs to calculate speed
previous_positions = {}
box_annotator = sv.BoxAnnotator(thickness=4)

def calculate_speed(previous_position, current_position, fps):
    distance = euclidean(previous_position, current_position)
    speed = distance * fps  # Speed in pixels per second
    return speed

# Process video frames
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Perform object detection
    results = model(frame, imgsz=1280)

    # Extract bounding boxes, confidences, and class IDs
    boxes = results[0].boxes.xyxy.cpu().numpy()
    confidences = results[0].boxes.conf.cpu().numpy()
    class_ids = results[0].boxes.cls.cpu().numpy().astype(int)

    # Prepare detections for tracking
    detections = []
    for i in range(len(boxes)):
        x1, y1, x2, y2 = boxes[i]
        bbox = [x1, y1, x2 - x1, y2 - y1]  # Convert to [x, y, w, h]
        detection = (bbox, confidences[i], class_ids[i])
        detections.append(detection)

    # Update the tracker with detections
    tracked_objects = tracker.update_tracks(detections, frame=frame)

    # Annotate the frame with boxes and labels
    for obj in tracked_objects:
        if not obj.is_confirmed():
            continue

        box = obj.to_ltwh()  # Get the bounding box as [left, top, width, height]
        obj_id = obj.track_id
        class_id = obj.det_class

        if class_id == 16:  # Assuming '16' is the class ID for dogs
            center = (int(box[0] + box[2] / 2), int(box[1] + box[3] / 2))

            if obj_id in previous_positions:
                speed = calculate_speed(previous_positions[obj_id], center, fps)
                label = f"ID {obj_id} | Speed: {speed:.2f} px/s"
            else:
                label = f"ID {obj_id} | Speed: Calculating..."

            previous_positions[obj_id] = center

            # Draw the bounding box
            cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])), (0, 255, 0), 2)
            # Put the label on top of the bounding box
            cv2.putText(frame, label, (int(box[0]), int(box[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

    # Write the processed frame to the output video
    video_writer.write(frame)

cap.release()
video_writer.release()
cv2.destroyAllWindows()

print(f"Processed video saved to {output_path}")


In [2]:
#@title Install dependencies
# install dependencies: (use cu101 because colab has CUDA 10.1)
#!pip install -U torch==1.5 torchvision==0.6 -f https://download.pytorch.org/whl/cu101/torch_stable.html
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-lm40kg8h
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-lm40kg8h
  Resolved https://github.com/facebookresearch/detectron2.git to commit bcfd464d0c810f0442d91a349c0f6df945467143
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pycocotools>=2.0.2 (from detectron2==0.6)
  Downloading pycocotools-2.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Collecting yacs>=0.1.8 (from detectron2==0.6)
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Collecting tabulate (from detectron2==0.6)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Collecting fvcore<0.1.6,>=0.1.5 (from detectron2==0.6)
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import torch
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)

torch:  2.3 ; cuda:  cpu


In [4]:
import detectron2
import pycocotools

In [5]:
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2
import random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

In [6]:
!pip install deep-sort-realtime




Version 2 detecting speed in km/h using different model

In [None]:
import cv2
import os
import numpy as np
import torch
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.utils.visualizer import Visualizer
from deep_sort_realtime.deepsort_tracker import DeepSort
from scipy.spatial.distance import euclidean

# Define the video path
MARKET_SQUARE_VIDEO_PATH = "/content/greyhound1.mp4"

# Setup Detectron2 model configuration
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # Set threshold for this model
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize the Detectron2 predictor
predictor = DefaultPredictor(cfg)

# Initialize the DeepSort tracker
tracker = DeepSort(max_age=30)

# Open the video file
cap = cv2.VideoCapture(MARKET_SQUARE_VIDEO_PATH)

# Verify the output directory and permissions
output_dir = "/content"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

if not os.access(output_dir, os.W_OK):
    raise PermissionError(f"Write permission denied for the directory {output_dir}")

# Define the output video path
output_path = os.path.join(output_dir, "dog_tracking_output_kmph.mp4")

assert cap.isOpened(), "Error reading video file"

# Get video properties
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

# Initialize VideoWriter with a successful FourCC code
fourcc_code = cv2.VideoWriter_fourcc(*"mp4v")
video_writer = cv2.VideoWriter(output_path, fourcc_code, fps, (w, h))

# Example scale: 1 pixel = 0.05 meters (adjust according to your video)
scale_meters_per_pixel = 0.05

# Function to calculate speed in km/h
def calculate_speed(previous_position, current_position, fps):
    distance_pixels = euclidean(previous_position, current_position)
    # Convert pixels to meters
    distance_meters = distance_pixels * scale_meters_per_pixel
    # Speed in meters per second
    speed_mps = distance_meters * fps
    # Convert to kilometers per hour (km/h)
    speed_kmph = speed_mps * 3.6
    return speed_kmph

# Track previous positions of dogs to calculate speed
previous_positions = {}

# Process video frames
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Perform object detection
    outputs = predictor(frame)

    # Extract bounding boxes, confidences, and class IDs
    instances = outputs["instances"].to("cpu")
    boxes = instances.pred_boxes.tensor.numpy()
    confidences = instances.scores.numpy()
    class_ids = instances.pred_classes.numpy()

    # Filter out only dog detections (Class ID for dogs in COCO dataset is 17)
    dog_indices = np.where(class_ids == 17)[0]
    boxes = boxes[dog_indices]
    confidences = confidences[dog_indices]
    class_ids = class_ids[dog_indices]

    # Prepare detections for tracking
    detections = []
    for i in range(len(boxes)):
        x1, y1, x2, y2 = boxes[i]
        bbox = [x1, y1, x2 - x1, y2 - y1]  # Convert to [x, y, w, h]
        detection = (bbox, confidences[i], class_ids[i])
        detections.append(detection)

    # Update the tracker with detections
    tracked_objects = tracker.update_tracks(detections, frame=frame)

    labels = []
    for obj in tracked_objects:
        if not obj.is_confirmed():
            continue

        box = obj.to_ltwh()  # Get the bounding box as [left, top, width, height]
        obj_id = obj.track_id
        class_id = obj.det_class

        center = (int(box[0] + box[2] / 2), int(box[1] + box[3] / 2))

        if obj_id in previous_positions:
            speed_kmph = calculate_speed(previous_positions[obj_id], center, fps)
            label = f"ID {obj_id} | Speed: {speed_kmph:.2f} km/h"
        else:
            label = f"ID {obj_id} | Speed: calculating..."

        previous_positions[obj_id] = center

        # Draw the box and label on the frame
        cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])), (0, 255, 0), 2)
        cv2.putText(frame, label, (int(box[0]), int(box[1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

    # Write the processed frame to the output video
    video_writer.write(frame)

cap.release()
video_writer.release()
# cv2.destroyAllWindows()

print(f"Processed video saved to {output_path}")


Utilizing a Robust Pre-Trained Model

In [7]:
import cv2
import os
import numpy as np
import torch
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2 import model_zoo
from deep_sort_realtime.deepsort_tracker import DeepSort
from scipy.spatial.distance import euclidean

# Define the video path
MARKET_SQUARE_VIDEO_PATH = "/greyhound1.mp4"

# Setup Detectron2 model configuration
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # Set threshold for this model
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize the Detectron2 predictor
predictor = DefaultPredictor(cfg)

# Initialize the DeepSort tracker
tracker = DeepSort(max_age=30)

# Open the video file
cap = cv2.VideoCapture(MARKET_SQUARE_VIDEO_PATH)

# Verify the output directory and permissions
output_dir = "/content"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

if not os.access(output_dir, os.W_OK):
    raise PermissionError(f"Write permission denied for the directory {output_dir}")

# Define the output video path
output_path = os.path.join(output_dir, "dog_tracking_output_kmph.mp4")

assert cap.isOpened(), "Error reading video file"

# Get video properties
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

# Initialize VideoWriter with a successful FourCC code
fourcc_code = cv2.VideoWriter_fourcc(*"mp4v")
video_writer = cv2.VideoWriter(output_path, fourcc_code, fps, (w, h))

# Example scale: 1 pixel = 0.05 meters (adjust according to your video)
scale_meters_per_pixel = 0.05

# Define fixed bounding box size (adjust as needed)
fixed_box_width = 100  # Width of the bounding box
fixed_box_height = 60  # Height of the bounding box

# Function to calculate speed in km/h
def calculate_speed(previous_position, current_position, fps):
    distance_pixels = euclidean(previous_position, current_position)
    # Convert pixels to meters
    distance_meters = distance_pixels * scale_meters_per_pixel
    # Speed in meters per second
    speed_mps = distance_meters * fps
    # Convert to kilometers per hour (km/h)
    speed_kmph = speed_mps * 3.6
    return speed_kmph

# Track previous positions of dogs to calculate speed
previous_positions = {}

# Process video frames
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Perform object detection
    outputs = predictor(frame)

    # Extract bounding boxes, confidences, and class IDs
    instances = outputs["instances"].to("cpu")
    boxes = instances.pred_boxes.tensor.numpy()
    confidences = instances.scores.numpy()
    class_ids = instances.pred_classes.numpy()

    # Filter out only dog detections (Class ID for dogs in COCO dataset is 17)
    dog_indices = np.where(class_ids == 17)[0]
    boxes = boxes[dog_indices]
    confidences = confidences[dog_indices]
    class_ids = class_ids[dog_indices]

    # Prepare detections for tracking
    detections = []
    for i in range(len(boxes)):
        x1, y1, x2, y2 = boxes[i]
        # Use the center of the box and apply the fixed size
        cx, cy = int((x1 + x2) / 2), int((y1 + y2) / 2)
        bbox = [cx - fixed_box_width // 2, cy - fixed_box_height // 2, fixed_box_width, fixed_box_height]
        detection = (bbox, confidences[i], class_ids[i])
        detections.append(detection)

    # Update the tracker with detections
    tracked_objects = tracker.update_tracks(detections, frame=frame)

    labels = []
    for obj in tracked_objects:
        if not obj.is_confirmed():
            continue

        box = obj.to_ltwh()  # Get the bounding box as [left, top, width, height]
        obj_id = obj.track_id
        class_id = obj.det_class

        center = (int(box[0] + box[2] / 2), int(box[1] + box[3] / 2))

        if obj_id in previous_positions:
            speed_kmph = calculate_speed(previous_positions[obj_id], center, fps)
            label = f"ID {obj_id} | Speed: {speed_kmph:.2f} km/h"
        else:
            label = f"ID {obj_id} | Speed: calculating..."

        previous_positions[obj_id] = center

        # Draw the fixed-size box and label on the frame
        cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[0] + fixed_box_width), int(box[1] + fixed_box_height)), (0, 255, 0), 2)
        cv2.putText(frame, label, (int(box[0]), int(box[1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

    # Write the processed frame to the output video
    video_writer.write(frame)

cap.release()
video_writer.release()

print(f"Processed video saved to {output_path}")


[08/14 06:41:24 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl ...


model_final_f10217.pkl: 178MB [00:01, 134MB/s]                           
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Video frame is empty or video processing has been successfully completed.
Processed video saved to /content/dog_tracking_output_kmph.mp4
