In [1]:
!nvidia-smi

Fri Mar 21 06:49:05 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   39C    P8             10W /   70W |       1MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla T4                      

In [2]:
!pip install gdown



In [3]:
! rm -rf video.mp4

In [4]:
import gdown

# Update with your file's specific ID
file_id = "1NNhyO_PUrfy3dVat_BmlYaBSDCLIzo2Q"
url = f"https://drive.google.com/uc?id={file_id}"

output = "vehicle-counting.mp4"
gdown.download(url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1NNhyO_PUrfy3dVat_BmlYaBSDCLIzo2Q
To: /kaggle/working/vehicle-counting.mp4
100%|██████████| 35.3M/35.3M [00:00<00:00, 266MB/s]


'vehicle-counting.mp4'

In [5]:
import os
HOME = os.getcwd()
print(HOME)

/kaggle/working


In [6]:
SOURCE_VIDEO_PATH = "/kaggle/working/vehicle-counting.mp4"

In [7]:
# Pip install method (recommended)

!pip install "ultralytics<=8.3.40"

from IPython import display
display.clear_output()

import ultralytics
ultralytics.checks()

Ultralytics 8.3.40 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15095MiB)
Setup complete ✅ (4 CPUs, 31.4 GB RAM, 6170.0/8062.4 GB disk)


In [8]:
!pip install supervision==0.3.0

from IPython import display
display.clear_output()

import supervision
print("supervision.__version__:", supervision.__version__)

supervision.__version__: 0.3.0


In [9]:
# settings
MODEL = "yolov8x.pt"

In [10]:
from ultralytics import YOLO

model = YOLO(MODEL)
model.fuse()

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x.pt to 'yolov8x.pt'...


100%|██████████| 131M/131M [00:00<00:00, 254MB/s]


YOLOv8x summary (fused): 268 layers, 68,200,608 parameters, 0 gradients, 257.8 GFLOPs


Track and count

In [11]:
import supervision as sv
from ultralytics import YOLO
import os
import json
import cv2  # OpenCV for image saving
import numpy as np
from sklearn.cluster import KMeans
import webcolors  # For exact color names

# Function to check if a point is inside a polygon
def is_in_target_polygon(center_x, center_y, polygon):
    point = (center_x, center_y)
    return cv2.pointPolygonTest(np.array(polygon, dtype=np.int32), point, False) >= 0

# Perspective transformation: SOURCE and TARGET
SOURCE = np.array([
    [1252, 787],
    [2298, 803],
    [5039, 2159],
    [-550, 2159]
])

TARGET_WIDTH = 25
TARGET_HEIGHT = 250

# Constants for speed calculation
SCALE_FACTOR = 0.05  # Conversion factor from pixels/frame to real-world speed (km/h)
VEHICLE_POSITIONS = {}
FPS = 30

TARGET = np.array([
    [0, 0],
    [TARGET_WIDTH - 1, 0],
    [TARGET_WIDTH - 1, TARGET_HEIGHT - 1],
    [0, TARGET_HEIGHT - 1],
])

# Compute perspective transformation matrix
perspective_transform = cv2.getPerspectiveTransform(SOURCE.astype(np.float32), TARGET.astype(np.float32))


TARGET_VIDEO_PATH = 'output_video.mp4'
FRAME_SAVE_DIR = 'frames/'  # Directory to save frames
FRAME_DATA_PATH = 'frame_data.json'  # JSON file to save frame data

# Initialize box annotator for drawing bounding boxes
box_annotator = sv.BoxAnnotator(
    thickness=4,
    text_thickness=4,
    text_scale=2
)

# Open video info and frame generator
video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)
generator = sv.video.get_video_frames_generator(SOURCE_VIDEO_PATH)

frame_width = video_info.width
left_lane_end = frame_width // 3  # Left lane boundary
right_lane_start = 2 * (frame_width // 3)  # Right lane boundary

# Initialize sequential ID mapping
id_counter = 1
id_map = {}  # Maps tracker_id to a sequential ID
frame_data_list = []  # To store frame data
vehicle_positions = {}

# Create directory for saving frames
os.makedirs(FRAME_SAVE_DIR, exist_ok=True)

# Define the crossing line position (horizontal line in the middle of the frame)
line_y_position = video_info.height // 2  # Horizontal line in the middle of the frame

# Variables to track vehicle crossings
vehicle_crossings = {'entered': 0, 'exited': 0}
crossing_tracker = {}  # Tracks if a vehicle has crossed the line
stopped_vehicles = {}  # Track stopped vehicles
stopped_threshold = 50  # Number of frames to consider a vehicle stopped

# Predefined color names with RGB values (you can expand this list with more colors)
color_dict = {
    "red": (255, 0, 0),
    "green": (0, 255, 0),
    "blue": (0, 0, 255),
    "yellow": (255, 255, 0),
    "purple": (128, 0, 128),
    "orange": (255, 165, 0),
    "white": (255, 255, 255),
    "black": (0, 0, 0),
    "gray": (128, 128, 128),
    "brown": (139, 69, 19),
    "pink": (255, 192, 203),
    "violet": (238, 130, 238),
    "light_red": (255, 102, 102),  # Added more shades
    "light_green": (102, 255, 102),
    "light_blue": (102, 102, 255),
    "light_yellow": (255, 255, 102),
}


# Function to get the closest color name based on RGB values
def closest_color(rgb):
    min_colors = {}
    for name, color in color_dict.items():
        distance = np.linalg.norm(np.array(color) - np.array(rgb))
        min_colors[name] = distance
    return min(min_colors, key=min_colors.get)

def get_exact_vehicle_color(bbox, frame, k=1):
    # Crop the region of interest (vehicle) from the frame
    x1, y1, x2, y2 = map(int, bbox)
    vehicle_roi = frame[y1:y2, x1:x2]

    # Reshape the image into a 2D array of pixels
    pixels = vehicle_roi.reshape(-1, 3)

    # Apply KMeans clustering to find the dominant color
    kmeans = KMeans(n_clusters=k, n_init=10)
    kmeans.fit(pixels)

    # Get the dominant color (the centroid of the largest cluster)
    dominant_color = kmeans.cluster_centers_.astype(int)[0]

    # Convert BGR to RGB (OpenCV uses BGR by default)
    dominant_color_rgb = (dominant_color[2], dominant_color[1], dominant_color[0])  # BGR to RGB

    # Get the closest color name from the predefined dictionary
    closest_color_name = closest_color(dominant_color_rgb)

    return closest_color_name



# Open output video stream
with sv.VideoSink(TARGET_VIDEO_PATH, video_info) as sink:
    # Iterate through each frame in the video and track objects
    for frame_number, result in enumerate(
        YOLO('yolov8s.pt').track(
            source=SOURCE_VIDEO_PATH,
            tracker='bytetrack.yaml',
            show=False,
            stream=True,
            agnostic_nms=True,
            persist=True
        )
    ):
        # Extract frame and detections
        frame = result.orig_img
        detections = sv.Detections.from_yolov8(result)

        # Handle object IDs (tracker IDs)
        if result.boxes.id is not None:
            for tracker_id in result.boxes.id.cpu().numpy().astype(int):
                # Assign sequential ID if not already assigned
                if tracker_id not in id_map:
                    id_map[tracker_id] = id_counter
                    id_counter += 1

            # Update detections with new sequential IDs
            detections.tracker_id = [id_map[tracker_id] for tracker_id in result.boxes.id.cpu().numpy().astype(int)]

        
        # Check for vehicles crossing the line
        for bbox, confidence, class_id, tracker_id in detections:
            tracker_id = int(tracker_id)
            bbox = [float(coord) for coord in bbox]
            center_x = (bbox[0] + bbox[2]) / 2 # Center X of the bounding box
            center_y = (bbox[1] + bbox[3]) / 2  # Center Y of the bounding box
            is_stopped = False
        
             # Check if vehicle has a recorded previous position
            if tracker_id in stopped_vehicles:
                prev_x, prev_y, stop_count = stopped_vehicles[tracker_id]
        
                # If vehicle remains in the same position (small movement tolerance)
                if abs(center_x - prev_x) < 5 and abs(center_y - prev_y) < 5:
                    stop_count += 1  # Increase stop count
                else:
                    stop_count = 0  # Reset if moved
        
                # Update stored position and stop count
                stopped_vehicles[tracker_id] = (center_x, center_y, stop_count)
        
                # If stop count exceeds threshold, label as stopped
                if stop_count >= stopped_threshold:
                    is_stopped = True
        
            else:
                # Initialize vehicle tracking
                stopped_vehicles[tracker_id] = (center_x, center_y, 0)

            
            if tracker_id not in crossing_tracker:
                crossing_tracker[tracker_id] = {'crossed': False, 'last_position': bbox[1]}
            vehicle_color = get_exact_vehicle_color(bbox, frame)
            

            direction = "Unknown"
            if tracker_id in vehicle_positions:
                prev_x, prev_y = vehicle_positions[tracker_id]

                # Compare X and Y movement to determine direction
                if abs(center_x - prev_x) > abs(center_y - prev_y):  # Horizontal movement
                    direction = "Right" if center_x > prev_x else "Left"
                else:  # Vertical movement
                    direction = "Down" if center_y > prev_y else "Up"

            # Update vehicle position
            vehicle_positions[tracker_id] = (center_x, center_y)

            # Determine lane based on X coordinate
            if center_x < left_lane_end:
                lane = "Left Lane"
            elif center_x > right_lane_start:
                lane = "Right Lane"
            else:
                lane = "Middle Lane"

            # If vehicle is detected for the first time, initialize it in the tracker
            if tracker_id not in crossing_tracker:
                crossing_tracker[tracker_id] = {'crossed': False, 'last_position': center_y}

            # Check if vehicle is crossing the line
            if not crossing_tracker[tracker_id]['crossed']:
                # Vehicle is crossing the line from below
                if center_y > line_y_position and crossing_tracker[tracker_id]['last_position'] <= line_y_position:
                    vehicle_crossings['entered'] += 1
                    crossing_tracker[tracker_id]['crossed'] = True  # Mark as crossed
                # Vehicle is crossing the line from above
                elif center_y < line_y_position and crossing_tracker[tracker_id]['last_position'] >= line_y_position:
                    vehicle_crossings['exited'] += 1
                    crossing_tracker[tracker_id]['crossed'] = True  # Mark as crossed

            # Update the vehicle's last position
            crossing_tracker[tracker_id]['last_position'] = center_y

            # Draw the crossing line on the frame
            cv2.line(frame, (0, line_y_position), (video_info.width, line_y_position), (0, 255, 0), 2)

            # Add entry and exit counts to the frame
            cv2.putText(frame, f"Entered: {vehicle_crossings['entered']}", (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
            cv2.putText(frame, f"Exited: {vehicle_crossings['exited']}", (30, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    
            # Calculate traffic congestion (vehicle count)
            congestion_level = len(detections)  # Number of vehicles detected in the current frame
            
            # Track vehicle's last known position
            if tracker_id not in VEHICLE_POSITIONS:
              VEHICLE_POSITIONS[tracker_id] = (center_x, center_y, frame_number)
    
            prev_x, prev_y, prev_frame = VEHICLE_POSITIONS[tracker_id]
            displacement = np.sqrt((center_x - prev_x) ** 2 + (center_y - prev_y) ** 2)
            time_elapsed = (frame_number - prev_frame) / FPS
            speed = (displacement / time_elapsed) * SCALE_FACTOR if time_elapsed > 0 else 0
    
    
            # Only draw if the vehicle is inside the polygon
            if is_in_target_polygon(center_x, center_y, SOURCE):
              label = f"ID: {tracker_id} | {model.model.names[class_id]} {confidence:0.2f}| Speed: {speed:.2f} km/h"
              frame = box_annotator.annotate(
        scene=frame,
        detections=sv.Detections(
            xyxy=np.array([bbox]),  # Convert to numpy array
            confidence=np.array([confidence]),  # Convert to numpy array
            class_id=np.array([class_id]),  # Convert to numpy array
            tracker_id=np.array([tracker_id]) if tracker_id is not None else None  # Tracker ID optional
        ),
        labels=[label]
    )
              if is_stopped:
                  cv2.putText(frame, f"STOPPED", (int(center_x), int(center_y) - 20),
                                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
      
            # Collect frame data for JSON with traffic congestion level and vehicle color
              frame_data = {
                            "frame_number": frame_number,
                            "congestion_level": congestion_level,  # Add congestion level
                            "detections": [
                                {
                                    "tracker_id": int(tracker_id),  # Convert to Python int
                                    "class_id": int(class_id),      # Convert to Python int
                                    "class_name": model.names[class_id],
                                    "direction": direction,
                                    "lane": lane,
                                    "vehicle_color": vehicle_color,
                                    "stopped": is_stopped,
                                    "confidence": float(confidence),  # Convert to Python float
                                    "bbox": [float(coord) for coord in bbox],  # Convert bbox to list of floats
                                    "speed_kmh": speed
                                }
                                for bbox, confidence, class_id, tracker_id in detections
                            ]
                        }
              frame_data_list.append(frame_data)
            
        # Draw the source polygon
        cv2.polylines(frame, [SOURCE.astype(np.int32)], isClosed=True, color=(0, 255, 0), thickness=2)

        # Apply perspective transformation
        warped_frame = cv2.warpPerspective(frame, perspective_transform, (TARGET_WIDTH, TARGET_HEIGHT))
        cv2.imwrite(f"warped_frame_{frame_number:04d}.jpg", warped_frame)

        # Save current frame to disk
        frame_path = os.path.join(FRAME_SAVE_DIR, f"frame_{frame_number:04d}.jpg")
        cv2.imwrite(frame_path, cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
               

        # Write annotated frame to the output video
        sink.write_frame(frame)

    print(f"Vehicle tracking completed. Entered: {vehicle_crossings['entered']}, Exited: {vehicle_crossings['exited']}")

# Save frame data to a JSON file
with open(FRAME_DATA_PATH, 'w') as json_file:
    json.dump(frame_data_list, json_file, indent=4)

print(f"Frames saved to '{FRAME_SAVE_DIR}' and frame data saved to '{FRAME_DATA_PATH}'.")


Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt'...


100%|██████████| 21.5M/21.5M [00:00<00:00, 173MB/s]


[31m[1mrequirements:[0m Ultralytics requirement ['lapx>=0.5.2'] not found, attempting AutoUpdate...
Collecting lapx>=0.5.2
  Downloading lapx-0.5.11.post1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.3 kB)
Downloading lapx-0.5.11.post1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 26.4 MB/s eta 0:00:00
Installing collected packages: lapx
Successfully installed lapx-0.5.11.post1

[31m[1mrequirements:[0m AutoUpdate success ✅ 3.5s, installed 1 package: ['lapx>=0.5.2']
[31m[1mrequirements:[0m ⚠️ [1mRestart runtime or rerun command for updates to take effect[0m


video 1/1 (frame 1/538) /kaggle/working/vehicle-counting.mp4: 384x640 4 cars, 1 truck, 40.8ms
video 1/1 (frame 2/538) /kaggle/working/vehicle-counting.mp4: 384x640 4 cars, 1 truck, 10.9ms
video 1/1 (frame 3/538) /kaggle/working/vehicle-countin

In [12]:
from IPython.display import FileLink
FileLink(r'output_video.mp4')