In [None]:
# Kaggle Notebook: People Detection in Security Camera Footage using YOLOv5 (GPU Support)

import torch
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Check if GPU is available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using device: {device}')

# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s').to(device)

# Define a function to process video frames
def process_frame(frame, model):
    # Run YOLOv5 on the frame
    results = model(frame)

    # Extract bounding boxes and labels
    boxes = results.xyxy[0].cpu().numpy()
    labels = results.names
    
    return boxes, labels

# Define a function to annotate frames with detected bounding boxes
def annotate_frame(frame, boxes, labels):
    for box in boxes:
        x1, y1, x2, y2, conf, cls = box
        if labels[int(cls)] == 'person':
            # Draw bounding box
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
            # Add label
            cv2.putText(frame, f'{labels[int(cls)]} {conf:.2f}', (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    
    return frame

# Load the video file
video_path = '/kaggle/input/exp-01-jun-2024-1145-cam1-4-mp4/Exp_01_Jun_2024_1145_cam1-4.mp4'
cap = cv2.VideoCapture(video_path)

# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
output_path = '/kaggle/working/output_video.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

# Check if video loaded successfully
if not cap.isOpened():
    print("Error opening video stream or file")

# Process the video frame by frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Split the frame into four quadrants
    height, width, _ = frame.shape
    half_height, half_width = height // 2, width // 2
    
    quadrants = [
        frame[0:half_height, 0:half_width],
        frame[0:half_height, half_width:width],
        frame[half_height:height, 0:half_width],
        frame[half_height:height, half_width:width]
    ]
    
    # Process each quadrant
    for i, quadrant in enumerate(quadrants):
        # Ensure quadrant frame is in the correct format
        quadrant_rgb = cv2.cvtColor(quadrant, cv2.COLOR_BGR2RGB)
        boxes, labels = process_frame(quadrant_rgb, model)
        quadrants[i] = annotate_frame(quadrant, boxes, labels)
    
    # Combine the quadrants back into a single frame
    top_row = np.hstack((quadrants[0], quadrants[1]))
    bottom_row = np.hstack((quadrants[2], quadrants[3]))
    combined_frame = np.vstack((top_row, bottom_row))
    
    # Write the frame to the output video file
    out.write(combined_frame)

# Release video capture and writer objects
cap.release()
out.release()

print("Video processing complete. The output video is saved to /kaggle/working/output_video.mp4")


Using device: cuda


Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-6-6 Python-3.10.13 torch-2.1.2 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
