K-means Clustering

In [5]:
import cv2
import torch
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import numpy as np
from sklearn.cluster import KMeans
from collections import Counter

# Load the YOLOv8 model to run on CPU
model = YOLO(r'C:\Users\hp\Desktop\Master_Thesis_Project\Thesis Project\Final Thesis Project\New_Run\New_Run\Scenario 1\train\train\weights\best.pt')

# Initialize the Deep SORT tracker
tracker = DeepSort(
    max_age=150,       # If an object doesn't re-enter the frame within 150 frames, assign a new ID
    n_init=5,          # Increase the number of confirmations required to reduce false positives
    nn_budget=100,     # Store more past embeddings to improve ID accuracy
    max_iou_distance=0.30  # Lower IoU threshold for more precise object matching
)

# Open your video source
video_path = r'C:\Users\hp\Desktop\Master_Thesis_Project\Dataset\Filtered_Video Dataset\filtered_video_4.mp4'
cap = cv2.VideoCapture(video_path)

# Set parameters for saving the output video
output_video_path = r'C:\Users\hp\Desktop\Master_Thesis_Project\Thesis Project\Final Thesis Project\Video & Image Output\k_means_output4.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = None

# Function to increase brightness and saturation of a color
def adjust_brightness(color, brightness_factor=1.0, saturation_factor=3.0):
    """Increase the brightness and saturation of a color."""
    # Increase the brightness of the color
    brightened = tuple(min(int(c * brightness_factor), 255) for c in color)
    
    # Increase saturation by amplifying color differences
    max_value = max(brightened)
    saturated = tuple(min(int(c * saturation_factor + max_value * (1 - saturation_factor)), 255) for c in brightened)
    
    return saturated

# Function to perform color clustering
def get_dominant_color(image, k=1):
    """Find dominant colors using K-Means."""
    # Return the average color if the image is too small or empty
    if image.size == 0 or image.shape[0] < 10 or image.shape[1] < 10:
        return tuple(map(int, image.mean(axis=(0, 1)))) if image.size != 0 else (0, 0, 0)  # Get the average color

    # Reshape the image from (H, W, 3) to (H*W, 3)
    pixels = image.reshape(-1, 3)

    if len(pixels) == 0:
        return (0, 0, 0)

    # Use K-Means clustering to find dominant colors
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(pixels)

    # Find the number of pixels in each cluster
    counts = Counter(kmeans.labels_)

    # Select the color of the largest cluster
    dominant_color = kmeans.cluster_centers_[counts.most_common(1)[0][0]]

    # Convert the color to integer format and return in (B, G, R) format
    return tuple(map(int, dominant_color))  # Return in BGR format

# Function to define a region of interest (ROI) from a bounding box
def get_roi(frame, x1, y1, x2, y2):
    """Define a central ROI within a bounding box."""
    roi_width = int((x2 - x1) * 0.5)  # Select an ROI that is 50% of the bounding box width
    roi_height = int((y2 - y1) * 0.5)  # Select an ROI that is 50% of the bounding box height
    center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2  # Find the center of the bounding box

    # Calculate the boundaries of the ROI
    roi_x1 = max(center_x - roi_width // 2, 0)
    roi_y1 = max(center_y - roi_height // 2, 0)
    roi_x2 = min(center_x + roi_width // 2, frame.shape[1])
    roi_y2 = min(center_y + roi_height // 2, frame.shape[0])

    # Crop the ROI from the frame
    return frame[roi_y1:roi_y2, roi_x1:roi_x2]

# Function to determine the average dominant color from multiple ROIs
def get_average_dominant_color(frame, x1, y1, x2, y2):
    """Find the average dominant color from multiple ROIs within a bounding box."""
    # Define the ROIs (top, bottom, left, right)
    rois = []
    rois.append(get_roi(frame, x1, y1, x2, y1 + (y2 - y1) // 2))  # Top half
    rois.append(get_roi(frame, x1, y1 + (y2 - y1) // 2, x2, y2))  # Bottom half
    rois.append(get_roi(frame, x1, y1, x1 + (x2 - x1) // 2, y2))  # Left half
    rois.append(get_roi(frame, x1 + (x2 - x1) // 2, y1, x2, y2))  # Right half

    # Find the dominant color for each ROI
    dominant_colors = [get_dominant_color(roi) for roi in rois]

    # Calculate the average dominant color
    avg_color = np.mean(dominant_colors, axis=0)

    return tuple(map(int, avg_color))  # Return the average dominant color

# Process the video frame by frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Perform object detection using YOLOv8 (running on CPU)
    results = model(frame)

    # Format detected objects
    detections = []
    for r in results:
        for box in r.boxes:
            # Get the coordinates (x1, y1, x2, y2) and detection confidence from YOLOv8
            x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
            w = x2 - x1  # Width
            h = y2 - y1  # Height
            confidence = box.conf[0].item()
            class_id = int(box.cls[0])

            # If class_id is for a person (0), track the person
            if class_id == 0:
                # Format for DeepSORT: ([x1, y1, w, h], confidence, class_id)
                detections.append(([x1, y1, w, h], confidence, class_id))

    # Track the objects using Deep SORT
    tracks = tracker.update_tracks(detections, frame=frame)

    # Draw tracked objects and add color information
    for track in tracks:
        if not track.is_confirmed():
            continue

        track_id = track.track_id
        ltrb = track.to_ltrb()  # Return [left, top, right, bottom]
        x1, y1, x2, y2 = map(int, ltrb)

        # Ensure that the boundary values are valid
        if x1 < 0: x1 = 0
        if y1 < 0: y1 = 0
        if x2 > frame.shape[1]: x2 = frame.shape[1]
        if y2 > frame.shape[0]: y2 = frame.shape[0]

        # Determine the average dominant color from multiple regions
        avg_dominant_color = get_average_dominant_color(frame, x1, y1, x2, y2)

        # Make the dominant color more vivid by increasing brightness and saturation
        avg_dominant_color = adjust_brightness(avg_dominant_color)

        # Use BGR format for OpenCV (draw the bounding box and add color info)
        cv2.rectangle(frame, (x1, y1), (x2, y2), avg_dominant_color, 2)
        # Draw the ID number in the same color as the bounding box
        cv2.putText(frame, f"ID: {track_id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, avg_dominant_color, 2)

    # Write the output video
    if out is None:
        height, width, _ = frame.shape
        out = cv2.VideoWriter(output_video_path, fourcc, 30, (width, height))

    out.write(frame)

# Release the video source and output
cap.release()
out.release()


0: 288x512 20 persons, 46.7ms
Speed: 2.5ms preprocess, 46.7ms inference, 1.0ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 39.9ms
Speed: 2.6ms preprocess, 39.9ms inference, 3.0ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 40.5ms
Speed: 1.0ms preprocess, 40.5ms inference, 1.0ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 39.9ms
Speed: 2.0ms preprocess, 39.9ms inference, 2.0ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 40.1ms
Speed: 2.0ms preprocess, 40.1ms inference, 2.0ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 39.8ms
Speed: 2.0ms preprocess, 39.8ms inference, 2.4ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 39.5ms
Speed: 2.1ms preprocess, 39.5ms inference, 1.8ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 40.8ms
Speed: 1.1ms preprocess, 40.8ms inference, 1.0ms postprocess per image at