In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!python --version

Python 3.10.12


In [None]:
!pip install ultralytics
import ultralytics
ultralytics.__version__

Collecting ultralytics
  Downloading ultralytics-8.3.49-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.49-py3-none-any.whl (898 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m898.7/898.7 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.13-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.49 ultralytics-thop-2.0.13
Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


'8.3.49'

In [None]:
import os
os.chdir("/content/drive/MyDrive/deepsort/Tracking-and-counting-Using-YOLOv8-and-DeepSORT")

In [None]:
from ultralytics import YOLO
from deep_sort.deep_sort import DeepSort
import cv2
import numpy as np

# Load YOLOv8 model
model = YOLO('yolov8n.pt')  # Using a lightweight model

# Initialize DeepSORT
deepsort = DeepSort(
    model_path='/content/drive/MyDrive/deepsort/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/deep_sort/deep/checkpoint/ckpt.t7',
    max_dist=0.3,
    min_confidence=0.4,
    nms_max_overlap=1.0,
    max_iou_distance=0.7,
    max_age=100,
    n_init=1,
    nn_budget=100,
)

# Video file path
video_path = '/content/drive/MyDrive/deepsort/test_videos/first.mp4'
cap = cv2.VideoCapture(video_path)

# Settings for saving the result video
output_path = '/content/drive/MyDrive/deepsort/test_videos/909090_output.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, cap.get(cv2.CAP_PROP_FPS),
                      (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
                       int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

frame_count = 0  # Current frame counter

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1

    # Perform YOLO detection
    results = model(frame)

    # Convert YOLO results to DeepSORT input format
    detections = []  # Bounding boxes: [x_center, y_center, width, height]
    confidences = []  # Confidence scores
    for result in results:
        boxes = result.boxes
        for box in boxes:
            class_id = int(box.cls.cpu().numpy()[0])
            confidence = float(box.conf.cpu().numpy()[0])
            if class_id == 0 and confidence > 0.4:  # Filter low confidence
                x_min = box.xyxy.cpu().numpy()[0][0]
                y_min = box.xyxy.cpu().numpy()[0][1]
                x_max = box.xyxy.cpu().numpy()[0][2]
                y_max = box.xyxy.cpu().numpy()[0][3]

                # Compute center and size
                x_center = (x_min + x_max) / 2
                y_center = (y_min + y_max) / 2
                width = x_max - x_min
                height = y_max - y_min

                # Round and append
                x_center, y_center, width, height = map(lambda x: int(round(x)), [x_center, y_center, width, height])
                detections.append([x_center, y_center, width, height])
                confidences.append(confidence)

    # Convert to numpy arrays
    detections = np.array(detections)
    confidences = np.array(confidences)

    # Print frame number and YOLO output
    print(f"Frame: {frame_count}, Detections: {len(detections)} objects")

    # Perform DeepSORT tracking
    if len(detections) > 0:
        tracks = deepsort.update(detections, confidences, frame)

        # Visualize tracked objects
        for track in tracks:
            x_min, y_min, x_max, y_max, track_id = track.astype(int)
            print(f"Track ID: {track_id}, Position: ({x_min}, {y_min}, {x_max}, {y_max})")
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
            cv2.putText(frame, f"ID {track_id}", (x_min, y_min - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    else:
        deepsort.increment_ages()

    # Save result
    out.write(frame)

cap.release()
out.release()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Speed: 3.7ms preprocess, 9.5ms inference, 1.6ms postprocess per image at shape (1, 3, 416, 640)
Frame: 1804, Detections: 7 objects
Track ID: 7, Position: (44, 155, 119, 354)
Track ID: 31, Position: (32, 155, 107, 326)
Track ID: 35, Position: (648, 243, 819, 629)
Track ID: 38, Position: (217, 67, 321, 325)
Track ID: 56, Position: (702, 149, 767, 273)
Track ID: 59, Position: (96, 319, 281, 667)
Track ID: 60, Position: (282, 265, 428, 512)

0: 416x640 9 persons, 1 handbag, 9.7ms
Speed: 3.1ms preprocess, 9.7ms inference, 1.6ms postprocess per image at shape (1, 3, 416, 640)
Frame: 1805, Detections: 7 objects
Track ID: 7, Position: (42, 155, 119, 353)
Track ID: 31, Position: (32, 154, 105, 327)
Track ID: 35, Position: (648, 243, 819, 629)
Track ID: 38, Position: (217, 67, 320, 324)
Track ID: 56, Position: (702, 150, 767, 275)
Track ID: 59, Position: (96, 318, 281, 667)
Track ID: 60, Position: (282, 265, 429, 512)

0: 416x640 9

In [None]:
from collections import defaultdict
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

# Initialize metrics storage
ground_truth = defaultdict(list)  # Ground truth per frame
detections = defaultdict(list)  # Detections per frame
iou_threshold = 0.5  # IOU threshold for TP/FP

def calculate_iou(boxA, boxB):
    """Compute the Intersection over Union (IoU) of two bounding boxes."""
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interArea = max(0, xB - xA) * max(0, yB - yA)
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

def evaluate_tracking(gt_boxes, detected_boxes, iou_threshold=0.5):
    """Evaluate tracking using IOU-based TP/FP/FN."""
    tp, fp, fn = 0, 0, 0
    matched = set()

    for gt in gt_boxes:
        found_match = False
        for i, det in enumerate(detected_boxes):
            if i in matched:
                continue
            iou = calculate_iou(gt, det)
            if iou >= iou_threshold:
                tp += 1
                matched.add(i)
                found_match = True
                break
        if not found_match:
            fn += 1

    fp = len(detected_boxes) - len(matched)
    return tp, fp, fn

# Example for evaluation
frame_gt = [[50, 50, 150, 150], [200, 200, 300, 300]]  # Ground truth boxes
frame_detections = [[55, 55, 155, 155], [205, 205, 295, 295], [400, 400, 500, 500]]  # Model detections

tp, fp, fn = evaluate_tracking(frame_gt, frame_detections, iou_threshold)
precision = tp / (tp + fp) if tp + fp > 0 else 0
recall = tp / (tp + fn) if tp + fn > 0 else 0
f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0

print(f"Precision: {precision:.2f}, Recall: {recall:.2f}, F1 Score: {f1:.2f}")


Precision: 0.67, Recall: 1.00, F1 Score: 0.80


In [None]:
# Example for MOTA and MOTP
id_switches = 2  # Number of ID switches
total_gt_objects = len(frame_gt)

mota = 1 - (fp + fn + id_switches) / total_gt_objects
motp = np.mean([calculate_iou(gt, det) for gt, det in zip(frame_gt, frame_detections)])

print(f"MOTA: {mota:.2f}, MOTP: {motp:.2f}")


MOTA: -0.50, MOTP: 0.82
