In [None]:
!pip install ultralytics



In [None]:
# This script is designed to run in a Google Colab environment.
# It uses Ultralytics YOLOv8 for object detection and ByteTrack for object tracking.

import cv2
from ultralytics import YOLO
import os
import sys

# --- CONFIGURATION ---
# IMPORTANT: Update these paths for your specific Colab environment.
# Make sure your files are in these locations.

# Path to the trained YOLO segmentation model
# You can upload this to your Colab session or mount Google Drive.
# Example: If in Google Drive, use a path like '/content/drive/MyDrive/path/to/best.pt'
model_path = "/content/best (2).pt"

# Path to the input video
# Example: If in Google Drive, use a path like '/content/drive/MyDrive/path/to/video.mp4'
video_path = "/content/sample_video1.mp4"

# Path for the output annotated video
output_video_path = "/content/output_detection.mp4"

# Path for the log file
log_file_path = "/content/detection_log.txt"

# --- MAIN SCRIPT ---

# Load the trained segmentation model
# The `model = YOLO()` call is all you need here.
try:
    model = YOLO(model_path)
    print(f"✅ Successfully loaded model from: {model_path}")
except Exception as e:
    print(f"❌ Error loading model: {e}")
    sys.exit()

# Open the video file
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print(f"❌ Error opening video file: {video_path}")
    sys.exit()

# Get video properties for the output video
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Define the video writer for the output file
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# --- IMPORTANT: CLASS MAPPING ---
# Define your classes here. This is where you can correct the classification
# by ensuring the class indices match the labels from your trained model.
# Example: If your model was trained on classes 'car' (0) and 'pedestrian' (1),
# you would define them here.
# You can find the correct class indices in your model's `data.yaml` file.
# Replace the example below with your actual classes.
custom_classes = {
    0: 'Human',
    1: 'Stop Sign',
    2: 'Bus',
    3: 'Truck',
    4: 'Bicycle',
    5: 'Traffic Light',
    6: 'Motorcycle',
    7: 'Car'

}

# --- INFERENCE LOOP ---
print("\n🚦 Starting video inference...")

frame_count = 0
try:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run inference with ByteTrack enabled for tracking
        # We pass the `classes` argument here to filter for specific classes.
        # This is where you tell the model which classes you care about.
        results = model.track(frame, persist=True, tracker="bytetrack.yaml", classes=list(custom_classes.keys()))[0]

        # Check for results and draw annotations
        annotated_frame = results.plot()
        out.write(annotated_frame)
        frame_count += 1

        # Print progress every 100 frames
        if frame_count % 100 == 0:
            print(f"Processed {frame_count} frames...")

except Exception as e:
    print(f"\n❌ An error occurred during inference: {e}")

finally:
    # Cleanup and release resources
    cap.release()
    out.release()
    print("\n✅ Inference complete!")
    print(f"📁 Log saved to: {log_file_path}")
    print(f"📹 Annotated video saved to: {output_video_path}")

✅ Successfully loaded model from: /content/best (2).pt

🚦 Starting video inference...

0: 384x640 1 Truck, 1 Motorcycle, 1 Car, 284.8ms
Speed: 3.5ms preprocess, 284.8ms inference, 15.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Truck, 1 Motorcycle, 275.3ms
Speed: 3.5ms preprocess, 275.3ms inference, 8.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Truck, 285.9ms
Speed: 5.0ms preprocess, 285.9ms inference, 8.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Truck, 272.9ms
Speed: 3.8ms preprocess, 272.9ms inference, 9.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Truck, 267.5ms
Speed: 4.3ms preprocess, 267.5ms inference, 5.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Truck, 289.5ms
Speed: 3.5ms preprocess, 289.5ms inference, 9.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Truck, 282.3ms
Speed: 5.2ms preprocess, 282.3ms inference, 10.5ms postprocess per image at shape (1, 3