In [1]:
# Kaggle Notebook: People Detection in Security Camera Footage using YOLOv5 (GPU Support)

import torch
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Clone the YOLOv5 repository
!git clone https://github.com/ultralytics/yolov5  # Cloning the repository
%cd yolov5
%pip install -r requirements.txt  # Installing requirements

from models.common import DetectMultiBackend
from utils.general import non_max_suppression

# Check if GPU is available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using device: {device}')

# Load YOLOv5 model
device = torch.device(device)
model = DetectMultiBackend('/kaggle/input/yolo-v5-dolphins/pytorch/v1/1/best.pt', device=device)
model.eval()

# Define a function to process video frames
def process_frame(frame, model):
    # Preprocess the frame for YOLOv5
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = img / 255.0
    img = torch.from_numpy(img).float().to(device).permute(2, 0, 1).unsqueeze(0)

    # Run YOLOv5 on the frame
    with torch.no_grad():
        pred = model(img)

    # Apply NMS
    pred = non_max_suppression(pred)[0]
    
    # Extract bounding boxes and labels
    boxes = pred.cpu().numpy()
    labels = model.names
    
    return boxes, labels

# Define a function to annotate frames with detected bounding boxes
def annotate_frame(frame, boxes, labels):
    for box in boxes:
        x1, y1, x2, y2, conf, cls = box
        if labels[int(cls)] == 'dolphin':
            # Draw bounding box
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
            # Add label
            cv2.putText(frame, f'{labels[int(cls)]} {conf:.2f}', (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    
    return frame

# Load the video file
video_path = '/kaggle/input/exp-01-jun-2024-1145-cam1-4-mp4/Exp_01_Jun_2024_1145_cam1-4.mp4'
cap = cv2.VideoCapture(video_path)

# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
output_path = '/kaggle/working/output_video.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

# Check if video loaded successfully
if not cap.isOpened():
    print("Error opening video stream or file")

# Process the video frame by frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Split the frame into four quadrants
    height, width, _ = frame.shape
    half_height, half_width = height // 2, width // 2
    
    quadrants = [
        frame[0:half_height, 0:half_width],
        frame[0:half_height, half_width:width],
        frame[half_height:height, 0:half_width],
        frame[half_height:height, half_width:width]
    ]
    
    # Process each quadrant
    for i, quadrant in enumerate(quadrants):
        # Ensure quadrant frame is in the correct format
        quadrant_rgb = cv2.cvtColor(quadrant, cv2.COLOR_BGR2RGB)
        boxes, labels = process_frame(quadrant_rgb, model)
        quadrants[i] = annotate_frame(quadrant, boxes, labels)
    
    # Combine the quadrants back into a single frame
    top_row = np.hstack((quadrants[0], quadrants[1]))
    bottom_row = np.hstack((quadrants[2], quadrants[3]))
    combined_frame = np.vstack((top_row, bottom_row))
    
    # Write the frame to the output video file
    out.write(combined_frame)

# Release video capture and writer objects
cap.release()
out.release()

print("Video processing complete. The output video is saved to /kaggle/working/output_video.mp4")

Cloning into 'yolov5'...
remote: Enumerating objects: 16656, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 16656 (delta 1), reused 5 (delta 0), pack-reused 16649[K
Receiving objects: 100% (16656/16656), 15.12 MiB | 32.88 MiB/s, done.
Resolving deltas: 100% (11438/11438), done.
/kaggle/working/yolov5
Collecting pillow>=10.3.0 (from -r requirements.txt (line 9))
  Downloading pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.2 kB)
Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting ultralytics>=8.0.232 (from -r requirements.txt (line 18))
  Downloading ultralytics-8.2.28-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.2/41.2 kB[0m [31m656.0 kB/s[0m eta [36m0:00:00[0m
Collecting ultralytics-thop>=0.2.5 (from ultralytics>=8.0.232->-r requir

Fusing layers... 
YOLOv5s summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs


Video processing complete. The output video is saved to /kaggle/working/output_video.mp4
