<a href="https://colab.research.google.com/github/ShuvamAich/Temporal-Driver-Activity-Detection/blob/Human-detection-YOLOv5/Driver-detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install yolov5

Collecting yolov5
  Downloading yolov5-7.0.14-py37.py38.py39.py310-none-any.whl.metadata (10 kB)
Collecting thop>=0.1.1 (from yolov5)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting ultralytics>=8.0.100 (from yolov5)
  Downloading ultralytics-8.3.51-py3-none-any.whl.metadata (35 kB)
Collecting fire (from yolov5)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting boto3>=1.19.1 (from yolov5)
  Downloading boto3-1.35.84-py3-none-any.whl.metadata (6.7 kB)
Collecting sahi>=0.11.10 (from yolov5)
  Downloading sahi-0.11.20-py3-none-any.whl.metadata (17 kB)
Collecting huggingface-hub<0.25.0,>=0.12.0 (from yolov5)
  Downloading huggingface_hub-0.24.7-py3-none-any.whl.metadata (13 kB)
Collecting roboflow>=0.2.29 (from yolov5)
  Downloading roboflow-1.1.50-py3-none-any.whl.metadata (

In [None]:
pip install opencv-python



In [None]:
import cv2
import torch
from pathlib import Path
import warnings

# Suppress FutureWarnings
warnings.filterwarnings("ignore", category=FutureWarning)



model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

input_video_path = '/content/run1b_2018-05-29-14-02-47.ids_2 - Trim.mp4'
output_video_path = '/content/human_detection_output.mp4'

Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-12-19 Python-3.10.12 torch-2.5.1+cu121 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [None]:
# Open the input video
cap = cv2.VideoCapture(input_video_path)

if not cap.isOpened():
    print(f"Error: Unable to open video file {input_video_path}")
    exit()

# Get video properties
original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

print(f"Video properties: {original_width}x{original_height} @ {fps} FPS, {total_frames} total frames")

# Resize settings for faster processing
resize_width = 640
resize_height = 360

# Define the codec and create a VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (resize_width, resize_height))

# Frame skipping for speedup
frame_skip = 2  # Process every 2nd frame
frame_count = 0
processed_count = 0

batch = []
batch_size = 4  # Batch size for YOLO inference

while True:
    ret, frame = cap.read()
    if not ret:
        print("End of video or error reading frame.")
        break

    frame_count += 1
    if frame_count % frame_skip != 0:
        continue

    # Resize frame for faster processing
    frame = cv2.resize(frame, (resize_width, resize_height))
    batch.append(frame)

    print(f"Processing frame {frame_count}/{total_frames}")  # Display the current frame being processed

    if len(batch) == batch_size:
        # Perform batch inference
        results = model(batch)

        for i, result in enumerate(results.xyxy):
            detections = result.cpu().numpy()  # [x1, y1, x2, y2, confidence, class]
            for *box, confidence, cls in detections:
                if int(cls) == 0:  # Class 0 corresponds to 'person'
                    x1, y1, x2, y2 = map(int, box)
                    label = f"Person {confidence:.2f}"
                    cv2.rectangle(batch[i], (x1, y1), (x2, y2), (255, 0, 0), 2)
                    cv2.putText(batch[i], label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

            # Write processed frame to output video
            out.write(batch[i])
            processed_count += 1

        batch = []  # Clear batch

# Release resources
cap.release()
out.release()
print(f"Detection complete. Processed {processed_count} frames out of {total_frames}. Output saved at {output_video_path}")

Video properties: 1280x1024 @ 30 FPS, 1828 total frames
Processing frame 2/1828
Processing frame 4/1828
Processing frame 6/1828
Processing frame 8/1828
Processing frame 10/1828
Processing frame 12/1828
Processing frame 14/1828
Processing frame 16/1828
Processing frame 18/1828
Processing frame 20/1828
Processing frame 22/1828
Processing frame 24/1828
Processing frame 26/1828
Processing frame 28/1828
Processing frame 30/1828
Processing frame 32/1828
Processing frame 34/1828
Processing frame 36/1828
Processing frame 38/1828
Processing frame 40/1828
Processing frame 42/1828
Processing frame 44/1828
Processing frame 46/1828
Processing frame 48/1828
Processing frame 50/1828
Processing frame 52/1828
Processing frame 54/1828
Processing frame 56/1828
Processing frame 58/1828
Processing frame 60/1828
Processing frame 62/1828
Processing frame 64/1828
Processing frame 66/1828
Processing frame 68/1828
Processing frame 70/1828
Processing frame 72/1828
Processing frame 74/1828
Processing frame 76/182

In [None]:
from IPython.display import display, Video
# Display the output video inline in Colab
print("Evaluating the processed output video:")
display(Video(output_video_path, embed=True, width=640, height=360))



Evaluating the processed output video:
