In [1]:
import cv2
import torch
import numpy as np
import random
import time
import psutil
from collections import defaultdict
from google.colab.patches import cv2_imshow

In [2]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5n')
face_net = cv2.dnn.readNetFromCaffe('/content/deploy.prototxt', '/content/res10_300x300_ssd_iter_140000.caffemodel')
profile_face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_profileface.xml')

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /root/.cache/torch/hub/master.zip


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


YOLOv5 🚀 2025-1-26 Python-3.11.11 torch-2.5.1+cu121 CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5n.pt to yolov5n.pt...
100%|██████████| 3.87M/3.87M [00:00<00:00, 85.8MB/s]

Fusing layers... 
YOLOv5n summary: 213 layers, 1867405 parameters, 0 gradients, 4.5 GFLOPs
Adding AutoShape... 


In [3]:
input_video_path = '/content/facenew.mp4'
output_video_path = 'output_faces_and_bodies.mp4'

In [4]:
cap = cv2.VideoCapture(input_video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

In [5]:
random.seed(42)
colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for _ in range(100)]

In [6]:
frame_count = 0
person_tracker = defaultdict(lambda: None)
next_person_id = 1
fps_values = []
cpu_usage = []
memory_usage = []

In [7]:
def calculate_iou(box1, box2):
    x1, y1, x2, y2 = box1
    x1_p, y1_p, x2_p, y2_p = box2
    xi1, yi1 = max(x1, x1_p), max(y1, y1_p)
    xi2, yi2 = min(x2, x2_p), min(y2, y2_p)
    inter_width = max(0, xi2 - xi1)
    inter_height = max(0, yi2 - yi1)
    intersection = inter_width * inter_height
    area_box1 = (x2 - x1) * (y2 - y1)
    area_box2 = (x2_p - x1_p) * (y2_p - y1_p)
    union = area_box1 + area_box2 - intersection
    return intersection / union if union > 0 else 0

In [8]:
while cap.isOpened():
    start_time = time.time()
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)
    persons = results.xyxy[0].cpu().numpy()
    persons = [p for p in persons if int(p[-1]) == 0]

    h, w = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), (104.0, 177.0, 123.0))
    face_net.setInput(blob)
    face_detections = face_net.forward()

    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    profile_faces = profile_face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    current_ids = []

    for person in persons:
        x1, y1, x2, y2, conf, cls = person
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

        matched_id = None
        for tracked_id, tracked_box in person_tracker.items():
            if tracked_box is not None and calculate_iou((x1, y1, x2, y2), tracked_box) > 0.5:
                matched_id = tracked_id
                break

        if matched_id is None:
            matched_id = next_person_id
            next_person_id += 1

        person_tracker[matched_id] = (x1, y1, x2, y2)
        current_ids.append(matched_id)

        color = colors[matched_id % len(colors)]

        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, f"Person {matched_id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        for i in range(face_detections.shape[2]):
            confidence = face_detections[0, 0, i, 2]
            if confidence > 0.5:
                fx1, fy1, fx2, fy2 = face_detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                fx1, fy1, fx2, fy2 = int(fx1), int(fy1), int(fx2), int(fy2)

                if fx1 >= x1 and fy1 >= y1 and fx2 <= x2 and fy2 <= y2:
                    cv2.rectangle(frame, (fx1, fy1), (fx2, fy2), color, 2)
                    cv2.putText(frame, "Face", (fx1, fy1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        for (px, py, pw, ph) in profile_faces:
            if px >= x1 and py >= y1 and px + pw <= x2 and py + ph <= y2:
                cv2.rectangle(frame, (px, py), (px + pw, py + ph), color, 2)
                cv2.putText(frame, "Face", (px, py - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    person_tracker = {k: v for k, v in person_tracker.items() if k in current_ids}

    out.write(frame)

    end_time = time.time()
    frame_fps = 1 / (end_time - start_time)
    fps_values.append(frame_fps)
    cpu_usage.append(psutil.cpu_percent())
    memory_usage.append(psutil.virtual_memory().percent)

  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with a

In [9]:
cap.release()
out.release()

In [10]:
# Benchmark Metrics
average_fps = np.mean(fps_values)
min_fps = np.min(fps_values)
max_fps = np.max(fps_values)
std_dev_fps = np.std(fps_values)
average_cpu = np.mean(cpu_usage)
average_memory = np.mean(memory_usage)

print("Benchmark Report:")
print(f"Average FPS: {average_fps:.2f}")
print(f"Minimum FPS: {min_fps:.2f}")
print(f"Maximum FPS: {max_fps:.2f}")
print(f"Standard Deviation of FPS: {std_dev_fps:.2f}")
print(f"Average CPU Usage: {average_cpu:.2f}%")
print(f"Average Memory Usage: {average_memory:.2f}%")
print(f"Output video saved as {output_video_path}")

Benchmark Report:
Average FPS: 1.54
Minimum FPS: 0.44
Maximum FPS: 2.26
Standard Deviation of FPS: 0.24
Average CPU Usage: 64.81%
Average Memory Usage: 14.21%
Output video saved as output_faces_and_bodies.mp4
