In [1]:
!pip install ultralytics
!pip install norfair

Collecting ultralytics
  Downloading ultralytics-8.2.78-py3-none-any.whl.metadata (41 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/41.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.0-py3-none-any.whl.metadata (8.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu1

In [2]:
import cv2
import numpy as np
from ultralytics import YOLO
import os
from norfair import Detection, Tracker
from google.colab import files

In [3]:
# Ensure the directory for saving the output exists
output_dir = '/content/output/'
os.makedirs(output_dir, exist_ok=True)

# Specify the path to the video file directly
video_path = r'/content/highway.mp4'  # Adjust the path based on where your video is located

# Load the best available YOLOv8 model (YOLOv8x)
model = YOLO('yolov8x.pt')  # You can use 'yolov8n.pt', 'yolov8s.pt', etc., for smaller models

# Load the video
cap = cv2.VideoCapture(video_path)

# Check if the video opened successfully
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Get video details
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)  # Use float FPS
output_video_path = os.path.join(output_dir, 'output_video.mp4')

# Define codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# Norfair Tracker setup
tracker = Tracker(distance_function="euclidean", distance_threshold=30)

# Object IDs for counting
car_counted_ids = set()
motorcycle_counted_ids = set()

# Total counts
total_car_count = 0
total_motorcycle_count = 0

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x.pt to 'yolov8x.pt'...


100%|██████████| 131M/131M [00:00<00:00, 182MB/s]


In [4]:
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLOv8x model for object detection
    results = model(frame)

    # Prepare detections for tracking
    detections = []
    for result in results:
        boxes = result.boxes.xyxy.cpu().numpy()  # Get the bounding box coordinates
        labels = result.names  # Get the labels

        for i, box in enumerate(boxes):
            x1, y1, x2, y2 = map(int, box)
            label = labels[int(result.boxes.cls[i])]

            if label in ['car', 'motorcycle']:
                center_x, center_y = (x1 + x2) / 2, (y1 + y2) / 2
                detections.append(Detection(points=np.array([center_x, center_y]), label=label))

                # Draw bounding box and label on the frame
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)

    # Update tracker
    tracked_objects = tracker.update(detections=detections)

    # Count unique cars and motorcycles based on their IDs
    for obj in tracked_objects:
        obj_label = obj.last_detection.label
        obj_id = obj.id
        if obj_label == 'car' and obj_id not in car_counted_ids:
            total_car_count += 1
            car_counted_ids.add(obj_id)
        elif obj_label == 'motorcycle' and obj_id not in motorcycle_counted_ids:
            total_motorcycle_count += 1
            motorcycle_counted_ids.add(obj_id)

    # Annotate frame with the total counts
    cv2.putText(frame, f'Total Cars: {total_car_count}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(frame, f'Total Motorcycles: {total_motorcycle_count}', (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Write the annotated frame to the output video
    out.write(frame)

# Release everything
cap.release()
out.release()

print(f"Annotated video saved as {output_video_path}")

# Download the processed video
files.download(output_video_path)


0: 384x640 1 person, 1 truck, 147.3ms
Speed: 17.3ms preprocess, 147.3ms inference, 1604.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 truck, 64.8ms
Speed: 10.7ms preprocess, 64.8ms inference, 11.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 truck, 62.2ms
Speed: 2.3ms preprocess, 62.2ms inference, 7.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 truck, 57.0ms
Speed: 3.5ms preprocess, 57.0ms inference, 5.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 1 truck, 65.0ms
Speed: 2.6ms preprocess, 65.0ms inference, 8.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 car, 1 truck, 132.9ms
Speed: 5.1ms preprocess, 132.9ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 truck, 64.3ms
Speed: 2.2ms preprocess, 64.3ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 car, 1 truck, 51.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>