In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Install Dependencies
!pip install ultralytics opencv-python-headless

Collecting ultralytics
  Downloading ultralytics-8.3.145-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [4]:
# Imports
import cv2
import torch
from ultralytics import YOLO
import numpy as np
import os

# 🔍 Load YOLOv5 model
model = YOLO('yolov5s.pt')
model.conf = 0.5

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
PRO TIP 💡 Replace 'model=yolov5s.pt' with new 'model=yolov5su.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov5su.pt to 'yolov5su.pt'...


100%|██████████| 17.7M/17.7M [00:00<00:00, 179MB/s]


In [7]:
# Set up paths
input_video_path = '/content/drive/MyDrive/dataset/coffee_mugs.mp4'
output_video_path = '/content/drive/MyDrive/dataset/mug_tracking_output.mp4'

# Load YOLOv5 model from Ultralytics
model = YOLO('yolov5s.pt')
model.conf = 0.5

# Open video file
cap = cv2.VideoCapture(input_video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Video Writer for output
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# Tracker setup
multi_tracker = cv2.legacy.MultiTracker_create()
tracker_type = 'csrt'
mug_ids = {}
initialized = False

# Create OpenCV tracker
def create_tracker():
    return cv2.legacy.TrackerCSRT_create()

PRO TIP 💡 Replace 'model=yolov5s.pt' with new 'model=yolov5su.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.



In [8]:
# Run detection + tracking
frame_count = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    if not initialized:
        results = model(frame)[0]
        count = 0
        for det in results.boxes:
            cls_id = int(det.cls[0].item())
            conf = det.conf[0].item()
            label = model.names[cls_id]
            if label == 'cup':
                x1, y1, x2, y2 = map(int, det.xyxy[0].tolist())
                bbox = (x1, y1, x2 - x1, y2 - y1)
                tracker = create_tracker()
                multi_tracker.add(tracker, frame, bbox)
                mug_ids[len(mug_ids)] = bbox
                count += 1
                if count == 2:
                    break
        initialized = True
    else:
        success, boxes = multi_tracker.update(frame)
        for i, newbox in enumerate(boxes):
            (x, y, w, h) = [int(v) for v in newbox]
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(frame, f"Mug ID: {i}", (x, y - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)

    out.write(frame)
    frame_count += 1

cap.release()
out.release()
cv2.destroyAllWindows()

print(f"Output video saved to: {output_video_path}")


0: 384x640 2 persons, 2 cups, 1 chair, 1 dining table, 44.3ms
Speed: 22.4ms preprocess, 44.3ms inference, 347.6ms postprocess per image at shape (1, 3, 384, 640)
Output video saved to: /content/drive/MyDrive/dataset/mug_tracking_output.mp4
