In [None]:
import cv2
import torch
import numpy as np
import random
from google.colab.patches import cv2_imshow
from collections import defaultdict

In [None]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5n')

In [None]:
face_net = cv2.dnn.readNetFromCaffe('/content/deploy.prototxt', '/content/res10_300x300_ssd_iter_140000.caffemodel')
profile_face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_profileface.xml')

In [None]:
input_video_path = '/content/facenew.mp4'
output_video_path = 'output_faces_and_bodies.mp4'

In [None]:
cap = cv2.VideoCapture(input_video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

In [None]:
random.seed(42)
colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for _ in range(100)]

In [None]:
frame_count = 0
person_tracker = defaultdict(lambda: None)
next_person_id = 1

In [None]:
def calculate_iou(box1, box2):
    x1, y1, x2, y2 = box1
    x1_p, y1_p, x2_p, y2_p = box2
    xi1, yi1 = max(x1, x1_p), max(y1, y1_p)
    xi2, yi2 = min(x2, x2_p), min(y2, y2_p)
    inter_width = max(0, xi2 - xi1)
    inter_height = max(0, yi2 - yi1)
    intersection = inter_width * inter_height
    area_box1 = (x2 - x1) * (y2 - y1)
    area_box2 = (x2_p - x1_p) * (y2_p - y1_p)
    union = area_box1 + area_box2 - intersection
    return intersection / union if union > 0 else 0

In [None]:
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)
    persons = results.xyxy[0].cpu().numpy()
    persons = [p for p in persons if int(p[-1]) == 0]

    h, w = frame.shape[:2]

    # Frontal face detection
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), (104.0, 177.0, 123.0))
    face_net.setInput(blob)
    face_detections = face_net.forward()

    # Profile face detection
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    profile_faces = profile_face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    current_ids = []

    for person in persons:
        x1, y1, x2, y2, conf, cls = person
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

        matched_id = None
        for tracked_id, tracked_box in person_tracker.items():
            if tracked_box is not None and calculate_iou((x1, y1, x2, y2), tracked_box) > 0.5:
                matched_id = tracked_id
                break

        if matched_id is None:
            matched_id = next_person_id
            next_person_id += 1

        person_tracker[matched_id] = (x1, y1, x2, y2)
        current_ids.append(matched_id)

        color = colors[matched_id % len(colors)]

        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, f"Person ", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Process frontal face detections
        for i in range(face_detections.shape[2]):
            confidence = face_detections[0, 0, i, 2]
            if confidence > 0.5:
                fx1, fy1, fx2, fy2 = face_detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                fx1, fy1, fx2, fy2 = int(fx1), int(fy1), int(fx2), int(fy2)

                if fx1 >= x1 and fy1 >= y1 and fx2 <= x2 and fy2 <= y2:
                    cv2.rectangle(frame, (fx1, fy1), (fx2, fy2), color, 2)
                    cv2.putText(frame, f"Face ", (fx1, fy1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Process profile face detections
        for (px, py, pw, ph) in profile_faces:
            if px >= x1 and py >= y1 and px + pw <= x2 and py + ph <= y2:
                cv2.rectangle(frame, (px, py), (px + pw, py + ph), color, 2)
                cv2.putText(frame, f"Face ", (px, py - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    person_tracker = {k: v for k, v in person_tracker.items() if k in current_ids}

    out.write(frame)

    if frame_count % 30 == 0:
        cv2_imshow(frame)

    frame_count += 1

In [None]:
cap.release()
out.release()

print(f"Output video saved as {output_video_path}")