In [1]:
# Install YOLOv8 library and other required packages
!pip install ultralytics opencv-python-headless matplotlib


Collecting ultralytics
  Downloading ultralytics-8.3.38-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.12-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.38-py3-none-any.whl (896 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m896.3/896.3 kB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.12-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.38 ultralytics-thop-2.0.12


In [2]:
from ultralytics import YOLO

# Load the pretrained YOLOv8 model
model = YOLO('yolov8n.pt')  # Use 'yolov8s.pt', 'yolov8m.pt' for larger models if needed


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 319MB/s]


In [3]:
from google.colab import files

# Upload the video
uploaded = files.upload()
video_path = list(uploaded.keys())[0]  # Get the uploaded file path
print(f"Uploaded video: {video_path}")


Saving dataset_video.mp4 to dataset_video.mp4
Uploaded video: dataset_video.mp4


In [4]:
import cv2
import numpy as np

# Function to calculate the distance between bounding box centers
def calculate_distance(box1, box2):
    center1 = ((box1[0] + box1[2]) / 2, (box1[1] + box1[3]) / 2)
    center2 = ((box2[0] + box2[2]) / 2, (box2[1] + box2[3]) / 2)
    return np.linalg.norm(np.array(center1) - np.array(center2))

# Function to detect crowds
def detect_crowd(boxes, threshold=40):
    crowd_boxes = []
    used = [False] * len(boxes)  # Track which boxes are already grouped

    for i, box1 in enumerate(boxes):
        if used[i]:
            continue
        group = [box1]
        used[i] = True

        for j, box2 in enumerate(boxes):
            if i != j and not used[j] and calculate_distance(box1, box2) < threshold:
                group.append(box2)
                used[j] = True

        if len(group) > 2:  # Group is classified as a "crowd" if it has more than 2 people
            x_min = min([b[0] for b in group])
            y_min = min([b[1] for b in group])
            x_max = max([b[2] for b in group])
            y_max = max([b[3] for b in group])
            crowd_boxes.append([x_min, y_min, x_max, y_max])

    return crowd_boxes

# Process the video
cap = cv2.VideoCapture(video_path)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
output_video_path = 'output_detected_video.mp4'
out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLOv8 on the frame
    results = model(frame)

    # Extract bounding boxes for "person"
    person_boxes = [
        box.xyxy[0].tolist() for box in results[0].boxes if box.cls == 0  # 0 is the class ID for "person"
    ]

    # Detect crowds
    crowd_boxes = detect_crowd(person_boxes)

    # Annotate the frame
    for box in person_boxes:
        x1, y1, x2, y2 = map(int, box)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Green for "person"
        cv2.putText(frame, "Person", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    for box in crowd_boxes:
        x1, y1, x2, y2 = map(int, box)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)  # Red for "crowd"
        cv2.putText(frame, "Crowd", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

    # Write the annotated frame to the output video
    out.write(frame)

cap.release()
out.release()
print(f"Processed video saved as {output_video_path}")



0: 384x640 37 persons, 2 birds, 93.5ms
Speed: 15.2ms preprocess, 93.5ms inference, 771.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 38 persons, 3 birds, 10.4ms
Speed: 3.0ms preprocess, 10.4ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 35 persons, 3 birds, 9.4ms
Speed: 6.1ms preprocess, 9.4ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 37 persons, 3 birds, 10.8ms
Speed: 3.0ms preprocess, 10.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 37 persons, 2 birds, 7.5ms
Speed: 3.8ms preprocess, 7.5ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 38 persons, 2 birds, 8.0ms
Speed: 3.3ms preprocess, 8.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 36 persons, 2 birds, 10.0ms
Speed: 3.2ms preprocess, 10.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 36 persons, 2 birds, 9.8ms
Speed

In [5]:
from google.colab import files

# Download the processed video
files.download(output_video_path)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>