In [2]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.2.24-py3-none-any.whl (778 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m778.7/778.7 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1

In [4]:
from ultralytics import YOLO
import cv2
import torch

def load_yolo_v8_model():
    model = YOLO('yolov8n.pt')
    return model

def detect_and_track_yolo_v8(video_path, output_path, start_time=0, end_time=None):
    model = load_yolo_v8_model()
    cap = cv2.VideoCapture(video_path)

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    start_frame = int(start_time * fps)
    end_frame = int(end_time * fps) if end_time else total_frames

    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    while cap.isOpened():
        current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
        if current_frame >= end_frame:
            break

        ret, frame = cap.read()
        if not ret:
            break

        results = model(frame)

        for result in results:
            boxes = result.boxes.xyxy.cpu().numpy()
            confidences = result.boxes.conf.cpu().numpy()
            class_ids = result.boxes.cls.cpu().numpy().astype(int)

            for box, confidence, class_id in zip(boxes, confidences, class_ids):
                if confidence > 0.5:
                    x1, y1, x2, y2 = map(int, box)
                    label = model.names[class_id]
                    color = (255, 0, 0)
                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                    cv2.putText(frame, f"{label} {confidence:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        out.write(frame)

    cap.release()
    out.release()
    cv2.destroyAllWindows()

detect_and_track_yolo_v8('/content/cars.mp4', '/content/cars_Tracking.mp4', start_time=10, end_time=20)



0: 384x640 1 person, 10 cars, 114.4ms
Speed: 5.2ms preprocess, 114.4ms inference, 1855.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 10 cars, 8.3ms
Speed: 2.5ms preprocess, 8.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 10 cars, 6.6ms
Speed: 2.2ms preprocess, 6.6ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 10 cars, 7.0ms
Speed: 1.8ms preprocess, 7.0ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 10 cars, 7.3ms
Speed: 1.1ms preprocess, 7.3ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 10 cars, 7.1ms
Speed: 1.3ms preprocess, 7.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 10 cars, 1 bus, 7.4ms
Speed: 1.6ms preprocess, 7.4ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 10 cars, 8.6ms
Speed: 1.2ms prepr

In [6]:
detect_and_track_yolo_v8('/content/supermarket.mp4', '/content/supermarket_Tracking.mp4')


0: 480x640 1 apple, 121.2ms
Speed: 1.6ms preprocess, 121.2ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 orange, 2 potted plants, 6.6ms
Speed: 1.5ms preprocess, 6.6ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 oranges, 2 potted plants, 10.9ms
Speed: 2.0ms preprocess, 10.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 orange, 2 potted plants, 10.4ms
Speed: 2.4ms preprocess, 10.4ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 orange, 1 potted plant, 11.0ms
Speed: 1.3ms preprocess, 11.0ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 orange, 1 potted plant, 10.7ms
Speed: 1.4ms preprocess, 10.7ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 orange, 1 potted plant, 11.2ms
Speed: 1.3ms preprocess, 11.2ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 orang