## Check GPU Access

In [None]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


## Connect with Google Drive

In [None]:
!pip install gdown



In [None]:
! rm -rf video.mp4

### Download the source video

In [None]:
import gdown

# Update with your file's specific ID
file_id = "1NNhyO_PUrfy3dVat_BmlYaBSDCLIzo2Q"
url = f"https://drive.google.com/uc?id={file_id}"

output = "vehicle-counting.mp4"
gdown.download(url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1NNhyO_PUrfy3dVat_BmlYaBSDCLIzo2Q
To: /content/vehicle-counting.mp4
100%|██████████| 35.3M/35.3M [00:00<00:00, 61.8MB/s]


'vehicle-counting.mp4'

In [None]:
import os
HOME = os.getcwd()
print(HOME)

/content


In [None]:
SOURCE_VIDEO_PATH = "/content/vehicle-counting.mp4"

## Install YOLOv8

In [None]:
# Pip install method (recommended)

!pip install "ultralytics<=8.3.40"

from IPython import display
display.clear_output()

import ultralytics
ultralytics.checks()

Ultralytics 8.3.40 🚀 Python-3.10.12 torch-2.5.1+cu121 CPU (Intel Xeon 2.20GHz)
Setup complete ✅ (2 CPUs, 12.7 GB RAM, 32.7/107.7 GB disk)


In [None]:
# settings
MODEL = "yolov8x.pt"

In [None]:
from ultralytics import YOLO

model = YOLO(MODEL)
model.fuse()

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x.pt to 'yolov8x.pt'...


100%|██████████| 131M/131M [00:01<00:00, 113MB/s]


YOLOv8x summary (fused): 268 layers, 68,200,608 parameters, 0 gradients, 257.8 GFLOPs


In [None]:
!pip install supervision==0.3.0

Collecting supervision==0.3.0
  Downloading supervision-0.3.0-py3-none-any.whl.metadata (6.4 kB)
Downloading supervision-0.3.0-py3-none-any.whl (21 kB)
Installing collected packages: supervision
  Attempting uninstall: supervision
    Found existing installation: supervision 0.25.1
    Uninstalling supervision-0.25.1:
      Successfully uninstalled supervision-0.25.1
Successfully installed supervision-0.3.0


## Tracking with ByteTrack

In [None]:
!pip install supervision




In [None]:
SCALE_FACTOR = 0.05
FPS = 30
VEHICLE_POSITIONS = {}


In [None]:
import supervision as sv
from ultralytics import YOLO
import os
import json
import cv2  # OpenCV for image saving
import numpy as np

# Function to check if a point is inside a polygon
def is_in_target_polygon(center_x, center_y, polygon):
    point = (center_x, center_y)
    # Use OpenCV's pointPolygonTest (returns 1 if inside, 0 on edge, -1 if outside)
    return cv2.pointPolygonTest(np.array(polygon, dtype=np.int32), point, False) >= 0

# Perspective transformation: SOURCE and TARGET
SOURCE = np.array([
    [1252, 787],
    [2298, 803],
    [5039, 2159],
    [-550, 2159]
])

TARGET_WIDTH = 25
TARGET_HEIGHT = 250

TARGET = np.array([
    [0, 0],
    [TARGET_WIDTH - 1, 0],
    [TARGET_WIDTH - 1, TARGET_HEIGHT - 1],
    [0, TARGET_HEIGHT - 1],
])

# Compute perspective transformation matrix
perspective_transform = cv2.getPerspectiveTransform(SOURCE.astype(np.float32), TARGET.astype(np.float32))

TARGET_VIDEO_PATH = 'output_video.mp4'
FRAME_SAVE_DIR = 'frames/'
FRAME_DATA_PATH = 'frame_data.json'

# Initialize box annotator for drawing bounding boxes
box_annotator = sv.BoxAnnotator(
    thickness=4,
    text_thickness=4,
    text_scale=2
)

# Constants for speed calculation
SCALE_FACTOR = 0.05  # Conversion factor from pixels/frame to real-world speed - km/h
FPS = 30
VEHICLE_POSITIONS = {}

# Open video info and frame generator
video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)
generator = sv.video.get_video_frames_generator(SOURCE_VIDEO_PATH)

# Initialize sequential ID mapping
id_counter = 1
id_map = {}
frame_data_list = []

# Create directory for saving frames
os.makedirs(FRAME_SAVE_DIR, exist_ok=True)

with sv.VideoSink(TARGET_VIDEO_PATH, video_info) as sink:
    # Iterate through each frame in the video and track objects
    for frame_number, result in enumerate(
        YOLO('yolov8s.pt').track(
            source=SOURCE_VIDEO_PATH,
            tracker='bytetrack.yaml',
            show=False,
            stream=True,
            agnostic_nms=True,
            persist=True
        )
    ):
        # Extract frame and detections
        frame = result.orig_img
        detections = sv.Detections.from_yolov8(result)

        # Handle object IDs (tracker IDs)
        if result.boxes.id is not None:
            for tracker_id in result.boxes.id.cpu().numpy().astype(int):
                # Assign sequential ID if not already assigned
                if tracker_id not in id_map:
                    id_map[tracker_id] = id_counter
                    id_counter += 1

            # Update detections with new sequential IDs
            detections.tracker_id = [id_map[tracker_id] for tracker_id in result.boxes.id.cpu().numpy().astype(int)]

        # Define labels and calculate speed
        labels = []
        for bbox, confidence, class_id, tracker_id in detections:
            tracker_id = int(tracker_id)
            bbox = [float(coord) for coord in bbox]
            center_x, center_y = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2

            # Always track ID but only process inside the polygon
            if tracker_id not in VEHICLE_POSITIONS:
                VEHICLE_POSITIONS[tracker_id] = (center_x, center_y, frame_number)

            prev_x, prev_y, prev_frame = VEHICLE_POSITIONS[tracker_id]
            displacement = np.sqrt((center_x - prev_x) ** 2 + (center_y - prev_y) ** 2)
            time_elapsed = (frame_number - prev_frame) / FPS
            speed = (displacement / time_elapsed) * SCALE_FACTOR if time_elapsed > 0 else 0

            # Update the vehicle's position
            VEHICLE_POSITIONS[tracker_id] = (center_x, center_y, frame_number)

            # Only annotate if inside the polygon
            if is_in_target_polygon(center_x, center_y, SOURCE):
                # Add speed to the label
                labels.append(f"ID: {tracker_id} | Speed: {speed:.2f} km/h | {model.model.names[class_id]} {confidence:.2f}")

        # Annotate the frame
        frame = box_annotator.annotate(scene=frame, detections=detections, labels=labels)

        # Draw the source polygon
        cv2.polylines(frame, [SOURCE.astype(np.int32)], isClosed=True, color=(0, 255, 0), thickness=2)

        # Apply perspective transformation
        warped_frame = cv2.warpPerspective(frame, perspective_transform, (TARGET_WIDTH, TARGET_HEIGHT))

        # Save the current warped frame for visualization if needed
        cv2.imwrite(f"warped_frame_{frame_number:04d}.jpg", warped_frame)

        # Save current frame to disk
        frame_path = os.path.join(FRAME_SAVE_DIR, f"frame_{frame_number:04d}.jpg")
        cv2.imwrite(frame_path, cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))

        # Collect frame data for JSON
        frame_data = {
            "frame_number": frame_number,
            "detections": [
                {
                    "tracker_id": int(tracker_id),
                    "class_id": int(class_id),
                    "confidence": float(confidence),
                    "bbox": [float(coord) for coord in bbox],
                    "speed_kmh": speed
                }
                for bbox, confidence, class_id, tracker_id in detections
                if is_in_target_polygon(center_x, center_y, SOURCE)
            ]
        }
        frame_data_list.append(frame_data)

        # Write annotated frame to the output video
        sink.write_frame(frame)

    print("Vehicle tracking and speed estimation completed and output video saved.")

# Save frame data to a JSON file
with open(FRAME_DATA_PATH, 'w') as json_file:
    json.dump(frame_data_list, json_file, indent=4)

print(f"Frames saved to '{FRAME_SAVE_DIR}' and frame data saved to '{FRAME_DATA_PATH}'.")



video 1/1 (frame 1/538) /content/vehicle-counting.mp4: 384x640 4 cars, 1 truck, 391.1ms
video 1/1 (frame 2/538) /content/vehicle-counting.mp4: 384x640 4 cars, 1 truck, 409.3ms
video 1/1 (frame 3/538) /content/vehicle-counting.mp4: 384x640 4 cars, 1 truck, 409.4ms
video 1/1 (frame 4/538) /content/vehicle-counting.mp4: 384x640 3 cars, 1 truck, 428.1ms
video 1/1 (frame 5/538) /content/vehicle-counting.mp4: 384x640 3 cars, 1 bus, 398.6ms
video 1/1 (frame 6/538) /content/vehicle-counting.mp4: 384x640 3 cars, 1 bus, 413.4ms
video 1/1 (frame 7/538) /content/vehicle-counting.mp4: 384x640 3 cars, 1 truck, 413.8ms
video 1/1 (frame 8/538) /content/vehicle-counting.mp4: 384x640 2 cars, 1 truck, 417.3ms
video 1/1 (frame 9/538) /content/vehicle-counting.mp4: 384x640 3 cars, 1 truck, 401.2ms
video 1/1 (frame 10/538) /content/vehicle-counting.mp4: 384x640 3 cars, 1 train, 1 truck, 389.3ms
video 1/1 (frame 11/538) /content/vehicle-counting.mp4: 384x640 3 cars, 1 train, 1 truck, 484.8ms
video 1/1 (fram

### Generate output Video Link

In [None]:
from IPython.display import FileLink
FileLink('output_video.mp4')

In [None]:
#Download the output MP4 video
from google.colab import files
files.download("/content/output_video.mp4")
print(f"Downloaded video successfully")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded video successfully
