In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%pip install git+https://github.com/openai/CLIP.git
%pip install ultralytics
%pip install deep_sort_realtime

Found existing installation: clip 0.2.0
Uninstalling clip-0.2.0:
  Successfully uninstalled clip-0.2.0
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-5o5764jf
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-5o5764jf
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369490 sha256=34940fefb5b4190b18c6d086c90e9a182ff9274b0701c3c3a58dd8



In [None]:
# Install dependencies (run once per Colab session)
!pip install ultralytics deep-sort-realtime opencv-python-headless

# Import all
import cv2
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from google.colab.patches import cv2_imshow

# -----------------------------
# Load YOLO model
# -----------------------------
model = YOLO('/content/drive/MyDrive/Colab Notebooks/yolo11s.pt')  # adjust path

# -----------------------------
# Initialize DeepSORT tracker
# -----------------------------
tracker = DeepSort(
    max_age=20,
    nn_budget=200,
    embedder='clip_RN50',
    embedder_gpu=True,
    max_iou_distance=0.5,
    n_init=3
)

# -----------------------------
# Video input/output
# -----------------------------
cap = cv2.VideoCapture('/content/drive/MyDrive/Colab Notebooks/People.mp4')

fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Use 'avc1' codec for mp4 (works better in Colab)
out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

active_tracks = set()

# -----------------------------
# Processing loop
# -----------------------------
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Step 1: YOLO detection
    results = model(frame, conf=0.7)

    # Step 2: Format detections for DeepSORT
    detections = []
    for result in results:
        for box in result.boxes:
            cls = int(box.cls[0].item())
            if cls == 0:  # filter for your target class
                x1, y1, x2, y2 = box.xyxy[0].tolist()
                conf = box.conf[0].item()
                w, h = x2 - x1, y2 - y1
                detections.append(((x1, y1, w, h), conf, cls))

    # Step 3: Update tracker
    tracks = tracker.update_tracks(detections, frame=frame)

    # Step 4: Draw results
    fire_detected = False
    for track in tracks:
        if not track.is_confirmed():
            continue

        track_id = track.track_id
        ltrb = track.to_ltrb()
        x1, y1, x2, y2 = map(int, ltrb)

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f'ID: {track_id}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        fire_detected = True

        if track_id not in active_tracks:
            active_tracks.add(track_id)
            print(f'New object tracked: ID {track_id}')

    if not fire_detected:
        active_tracks.clear()

    frame = cv2.resize(frame, (width, height))
    frame = cv2.convertScaleAbs(frame)
    out.write(frame)


cap.release()
out.release()
cv2.destroyAllWindows()

print("✅ Video saved as output.mp4")



0: 384x640 8 persons, 1 backpack, 1 handbag, 11.5ms
Speed: 2.0ms preprocess, 11.5ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 1 backpack, 1 handbag, 12.6ms
Speed: 3.0ms preprocess, 12.6ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 1 backpack, 1 handbag, 12.7ms
Speed: 3.2ms preprocess, 12.7ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)
New object tracked: ID 1
New object tracked: ID 2
New object tracked: ID 3
New object tracked: ID 4
New object tracked: ID 5
New object tracked: ID 6
New object tracked: ID 7
New object tracked: ID 8

0: 384x640 8 persons, 1 backpack, 1 handbag, 12.0ms
Speed: 4.2ms preprocess, 12.0ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 1 backpack, 1 handbag, 11.9ms
Speed: 4.8ms preprocess, 11.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 1 handbag, 16.4ms
Speed: 5.