In [2]:

!pip install -q ultralytics opencv-python

import cv2
import numpy as np
from ultralytics import YOLO
from google.colab import files
import os
import time

video_path = "/content/853889-hd_1920_1080_25fps.mp4"

class CentroidTracker:
    def __init__(self, maxDisappeared=40, maxDistance=50):
        self.nextObjectID = 0
        self.objects = dict()        # objectID -> centroid (x,y)
        self.rects = dict()          # objectID -> bounding box (startX, startY, endX, endY)
        self.disappeared = dict()    # objectID -> frames disappeared
        self.maxDisappeared = maxDisappeared
        self.maxDistance = maxDistance

    def register(self, centroid, rect):
        self.objects[self.nextObjectID] = centroid
        self.rects[self.nextObjectID] = rect
        self.disappeared[self.nextObjectID] = 0
        self.nextObjectID += 1

    def deregister(self, objectID):
        del self.objects[objectID]
        del self.rects[objectID]
        del self.disappeared[objectID]

    def update(self, rects):
        # rects: list of (startX, startY, endX, endY)
        if len(rects) == 0:
            # mark all existing objects as disappeared
            for objectID in list(self.disappeared.keys()):
                self.disappeared[objectID] += 1
                if self.disappeared[objectID] > self.maxDisappeared:
                    self.deregister(objectID)
            return self.objects, self.rects

        # compute input centroids
        inputCentroids = np.zeros((len(rects), 2), dtype="int")
        for (i, (startX, startY, endX, endY)) in enumerate(rects):
            cX = int((startX + endX) / 2.0)
            cY = int((startY + endY) / 2.0)
            inputCentroids[i] = (cX, cY)

        # if no existing objects, register all
        if len(self.objects) == 0:
            for i in range(len(inputCentroids)):
                self.register(inputCentroids[i], rects[i])
        else:
            objectIDs = list(self.objects.keys())
            objectCentroids = list(self.objects.values())

            # distance matrix between object centroids and input centroids
            D = np.linalg.norm(np.array(objectCentroids)[:, None] - inputCentroids[None, :], axis=2)

            # greedy match: sort rows by their minimal distance
            rows = D.min(axis=1).argsort()
            cols = D.argmin(axis=1)[rows]

            usedRows = set()
            usedCols = set()

            for (row, col) in zip(rows, cols):
                if row in usedRows or col in usedCols:
                    continue
                if D[row, col] > self.maxDistance:
                    continue
                objectID = objectIDs[row]
                self.objects[objectID] = inputCentroids[col]
                self.rects[objectID] = rects[col]
                self.disappeared[objectID] = 0
                usedRows.add(row)
                usedCols.add(col)

            # find unmatched
            unusedRows = set(range(0, D.shape[0])).difference(usedRows)
            unusedCols = set(range(0, D.shape[1])).difference(usedCols)

            # if more existing objects than inputs -> increment disappeared
            if D.shape[0] >= D.shape[1]:
                for row in unusedRows:
                    objectID = objectIDs[row]
                    self.disappeared[objectID] += 1
                    if self.disappeared[objectID] > self.maxDisappeared:
                        self.deregister(objectID)
            else:
                # new objects appeared -> register them
                for col in unusedCols:
                    self.register(inputCentroids[col], rects[col])

        return self.objects, self.rects

class TrackableObject:
    def __init__(self, objectID, centroid):
        self.objectID = objectID
        self.centroids = [centroid]
        self.counted = False

print("Loading YOLOv8 model (may take ~30s)...")
model = YOLO("yolov8n.pt")  # downloads if needed

ct = CentroidTracker(maxDisappeared=40, maxDistance=60)
trackableObjects = dict()
totalCount = 0

# open video
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise SystemExit("❌ Error: Could not open the input video. Check path or try re-downloading.")

W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
fps = fps if fps and fps > 0 else 20.0

# output writer (MP4)
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out_path = "people_counting.mp4"
out = cv2.VideoWriter(out_path, fourcc, fps, (W, H))

# horizontal counting line (y-coordinate)
line_y = H // 2
line_offset = 10  # thickness offset for text placement

print(f"Video opened: {video_path}  (W,H) = ({W},{H}), fps={fps}")
start = time.time()

# ======= Step 5: Process frames =======
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # YOLO inference (single frame)
    results = model(frame, stream=True)  # stream=True yields results generator

    rects = []
    for result in results:
        for box in result.boxes:
            cls_id = int(box.cls[0])
            cls_name = model.names[cls_id]
            conf = float(box.conf[0])
            if cls_name != "person":
                continue
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            rects.append( (x1, y1, x2, y2) )

    # update tracker
    objects, bboxes = ct.update(rects)

    # draw counting line
    cv2.line(frame, (0, line_y), (W, line_y), (0, 0, 255), 2)

    # update trackable objects and counting logic
    for objectID, centroid in objects.items():
        to = trackableObjects.get(objectID, None)
        bbox = bboxes.get(objectID, None)
        if to is None:
            to = TrackableObject(objectID, centroid)
        else:
            to.centroids.append(centroid)

        # if not counted yet, check crossing
        if not to.counted and len(to.centroids) >= 2:
            prev_y = to.centroids[-2][1]
            cur_y = to.centroids[-1][1]

            # crossing downwards (above -> below)
            if prev_y < line_y and cur_y >= line_y:
                totalCount += 1
                to.counted = True
            # crossing upwards (below -> above) — count if you want both directions
            elif prev_y > line_y and cur_y <= line_y:
                totalCount += 1
                to.counted = True

        trackableObjects[objectID] = to

        # draw bbox or centroid + ID
        if bbox is not None:
            (x1, y1, x2, y2) = bbox
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"ID {objectID}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
        # draw centroid
        cv2.circle(frame, (centroid[0], centroid[1]), 4, (255, 0, 0), -1)

    # overlay total count
    cv2.putText(frame, f"Total: {totalCount}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2)

    out.write(frame)

# cleanup
cap.release()
out.release()
cv2.destroyAllWindows()
end = time.time()
print(f"Processing done in {end-start:.1f}s. Saved -> {out_path}")


if os.path.exists(out_path):
    files.download(out_path)
else:
    print("❌ Output file not found.")


Loading YOLOv8 model (may take ~30s)...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt': 100% ━━━━━━━━━━━━ 6.2MB 69.3MB/s 0.1s
Video opened: /content/853889-hd_1920_1080_25fps.mp4  (W,H) = (1920,1080), fps=25.0

0: 384x640 37 persons, 2 birds, 7.4ms
Speed: 2.5ms preprocess, 7.4ms inference, 21.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 38 persons, 3 birds, 8.3ms
Speed: 3.3ms preprocess, 8.3ms inference, 17.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 35 persons, 3 birds, 9.5ms
Speed: 3.3ms preprocess, 9.5ms inference, 18.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 37 persons, 3 birds, 9.5ms
Speed: 3.4ms preprocess, 9.5ms inference, 18.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 37 persons, 2 birds, 8.6ms
Speed: 3.4ms preprocess, 8.6ms inference, 17.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 38 persons, 2 birds, 9.3ms
Speed: 3.5ms 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>