In [1]:
%pip install -U ultralytics

Collecting ultralytics
  Obtaining dependency information for ultralytics from https://files.pythonhosted.org/packages/93/18/048166fe59600c8cec5857df75d7f9a45aef2c598d324782b05b1d6660c3/ultralytics-8.2.42-py3-none-any.whl.metadata
  Downloading ultralytics-8.2.42-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.2/41.2 kB[0m [31m91.2 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Downloading ultralytics-8.2.42-py3-none-any.whl (792 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m793.0/793.0 kB[0m [31m366.8 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: ultralytics
  Attempting uninstall: ultralytics
    Found existing installation: ultralytics 8.2.41
    Uninstalling ultralytics-8.2.41:
      Successfully uninstalled ultralytics-8.2.41
Successfully installed ultralytics-8.2.42
Note: you may need to restart the kernel to use updated packages.


In [2]:
# Import necessary libraries
import torch
from PIL import Image
from IPython.display import display
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# Load custom YOLOv5 model
model_path = 'best.pt'  # Update with your model path

In [4]:
model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_path)

Using cache found in /Users/arnaav/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-6-24 Python-3.11.5 torch-2.1.1 CPU

Fusing layers... 
Model summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
Adding AutoShape... 


In [5]:
def process_frame(frame, object_detector, model, kernel, roi_coords):
    # Convert frame to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Perform inference (Placeholder - replace with actual inference code)
    results = model(frame_rgb)  # Replace with actual model inference

    # Extract bounding box coordinates and calculate center points and heights
    max_area = 0
    max_box = None
    for result in results.xyxy[0]:  # xyxy format
        x_min, y_min, x_max, y_max, confidence, class_id = result.tolist()
        area = (x_max - x_min) * (y_max - y_min)
        if area > max_area:
            max_area = area
            max_box = (x_min, y_min, x_max, y_max)

    if max_box:
        x_min, y_min, x_max, y_max = [int(coord) for coord in max_box]
        center_x = (x_min + x_max) / 2
        center_y = (y_min + y_max) / 2
        height = y_max - y_min

        return (center_x, center_y), height, frame_rgb

    return None, None, frame_rgb


In [6]:
def apply_bg_subtraction(frame, object_detector, kernel, roi_coords):
    roi_x, roi_y, roi_width, roi_height = roi_coords

    # Apply Gaussian blur to reduce noise
    blurred_frame = cv2.GaussianBlur(frame, (5, 5), 0)

    # Apply the background subtractor to get the mask
    mask = object_detector.apply(blurred_frame)

    # Apply morphological operations to reduce noise
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)

    # Extract the ROI from the mask
    roi_mask = mask[roi_y:roi_y + roi_height, roi_x:roi_x + roi_width]

    # Count the number of non-zero pixels in the ROI mask
    movement = cv2.countNonZero(roi_mask)

    return movement

In [7]:
def determine_state(height_diff, movement_detected, threshold1, threshold2):
    if height_diff > threshold2 and movement_detected:
        return 'working'
    elif height_diff > threshold1 and movement_detected:
        return 'moving'
    else:
        return 'idle'

In [8]:
def annotate_frame(frame, center_point, height, state, movement_detected, text_x):
    center_x, center_y = center_point

    # Annotate the state and probabilities on the frame
    cv2.putText(frame, f'Movement: {movement_detected}', (text_x, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2, cv2.LINE_AA)
    cv2.putText(frame, f'State: {state}', (text_x, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2, cv2.LINE_AA)
    

    # Draw the center point on the frame
    cv2.circle(frame, (int(center_x), int(center_y)), 5, (0, 0, 255), -1)
    # Annotate the height on the frame
    cv2.putText(frame, f'Height: {int(height)}', (int(center_x), int(center_y - 10)),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA)

    return frame


In [9]:
def plot_metrics(center_points, heights, height_diffs):
    if center_points:
        tracked_centers_x = [pt[0] for pt in center_points]
        tracked_centers_y = [pt[1] for pt in center_points]
        tracked_heights = heights
        tracked_height_diffs = height_diffs

        plt.figure(figsize=(18, 6))

        # Plotting center points
        plt.subplot(1, 3, 1)
        plt.plot(tracked_centers_x, tracked_centers_y, marker='o')
        plt.title('Movement of Center Points')
        plt.xlabel('Frame')
        plt.ylabel('Center Point (x, y)')

        # Plotting heights
        plt.subplot(1, 3, 2)
        plt.plot(tracked_heights, marker='o')
        plt.title('Heights of Bounding Boxes')
        plt.xlabel('Frame')
        plt.ylabel('Height')

        # Plotting height differences
        plt.subplot(1, 3, 3)
        plt.plot(tracked_height_diffs, marker='o')
        plt.title('Height Differences')
        plt.xlabel('Frame')
        plt.ylabel('Height Difference')

        plt.show()

In [10]:
def test_model_on_video(video_path, output_video_path, threshold1, threshold2,model, movement_threshold=8000):
    # Open video
    cap = cv2.VideoCapture(video_path)

    # Get the width and height of the frames
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_video_path, fourcc, 20.0, (frame_width, frame_height))

    # List to store the center points and heights
    center_points = []
    heights = []
    height_diffs = []

    # Define the ROI coordinates (x, y, width, height)
    roi_coords = (250, 200, 500, frame_height - 200)

    # Create the background subtractor object
    object_detector = cv2.createBackgroundSubtractorMOG2()

    # Define kernel for morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))

    initial_height = None  # Variable to store the initial height

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        center_point, height, frame_rgb = process_frame(frame, object_detector,model, kernel, roi_coords)

        if center_point:
            if initial_height is None:
                initial_height = height

            center_points.append(center_point)
            heights.append(height)

            height_diff = abs(height - initial_height)
            height_diffs.append(height_diff)

            movement = apply_bg_subtraction(frame, object_detector, kernel, roi_coords)
            movement_detected = movement > movement_threshold

            state = determine_state(height_diff, movement_detected, threshold1, threshold2)
            max_height_diff = max(height_diffs) if height_diffs else 1

            text_x = frame_width - 200  # Adjust as needed for positioning
            frame = annotate_frame(frame, center_point, height, state, movement_detected, text_x)

            # Draw the ROI rectangle on the frame
            cv2.rectangle(frame, (roi_coords[0], roi_coords[1]),
                          (roi_coords[0] + roi_coords[2], roi_coords[1] + roi_coords[3]),
                          (255, 0, 0), 2)

        # Write the frame into the output video
        out.write(frame)

        key = cv2.waitKey(30)
        if key == 27:  # ESC key to exit
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    plot_metrics(center_points, heights, height_diffs)


In [11]:
test_model_on_video('input_video/video2.mp4','output_video/processed-video2.mp4',2,10,model,7500)

OpenCV: FFMPEG: tag 0x44495658/'XVID' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'
