In [2]:
import cv2
import torch
from torchvision import transforms

# Load your pre-trained PyTorch face detection model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
model.eval()


def find_optimal_safezone(faces, frame_width, frame_height, safe_zone_margin):
    # Initialize the safezone at the bottom of the frame
    safezone_top = frame_height * (1 - safe_zone_margin)
    safezone_bottom = frame_height

    # Check if any face bounding boxes intersect with the bottom safezone
    for (x1, y1, x2, y2, conf, cls) in faces:
        if y2 >= safezone_top:
            # A face intersects with the bottom safezone, try the top of the frame instead
            safezone_top = 0
            safezone_bottom = frame_height * safe_zone_margin
            break

    # Check if any face bounding boxes intersect with the top safezone
    for (x1, y1, x2, y2, conf, cls) in faces:
        if y1 <= safezone_bottom:
            # A face intersects with the top safezone, no safezone available in this frame
            return None  # Or return an empty safezone indication

    # Return the safezone coordinates
    return [0, safezone_top, frame_width, safezone_bottom]

def adjust_safe_zone(faces, original_safe_zone, width, height):
    # Define margin for safezone as a percentage of the frame height
    safe_zone_margin = 0.15  # Example: 15% of the frame height

    # Find an optimal safezone
    optimal_safezone = find_optimal_safezone(faces, width, height, safe_zone_margin)
    
    return optimal_safezone if optimal_safezone else original_safe_zone


def subtract_zones(safe, unsafe):
    # This function would subtract the unsafe zone from the safe zone
    # and return a list of reduced safe zones that do not overlap with the unsafe zone.
    # The actual implementation of this function would need to handle geometry calculations.
    # For simplicity, this is just a placeholder function.
    return [safe]  # Placeholder, return safe zone unchanged

def area(zone):
    # Calculate the area of a zone defined as [left, top, right, bottom]
    return (zone[2] - zone[0]) * (zone[3] - zone[1])
    
def calculate_safe_zone(frame_width, frame_height):
    # Define margins as a percentage of the frame dimensions
    margin_width = int(frame_width * 0.05)
    margin_height = int(frame_height * 0.05)
    # Calculate safezone coordinates
    safe_zone_left = margin_width
    safe_zone_top = margin_height
    safe_zone_right = frame_width - margin_width
    safe_zone_bottom = frame_height - int(frame_height * 0.15)  # Adjust the bottom margin if needed
    return [safe_zone_left, safe_zone_top, safe_zone_right, safe_zone_bottom]



def process_video(video_path, model):
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (width, height))

    original_safe_zone = calculate_safe_zone(width, height)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect faces in the frame
        faces = process_frame(frame, model, width, height)

        # Dynamically adjust the safe zone based on detected faces
        safe_zone = adjust_safe_zone(faces, list(original_safe_zone), width, height)

        # Convert safe_zone coordinates to integers
        safe_zone = [int(coord) for coord in safe_zone]

        # Ensure safe_zone has four elements before drawing
        if len(safe_zone) == 4:
            # Draw the adjusted safe zone
            cv2.rectangle(frame, (safe_zone[0], safe_zone[1]), (safe_zone[2], safe_zone[3]), (0, 255, 0), 2)
        else:
            print("Error: safe_zone does not have the expected format or number of elements.")

        # Write the frame with annotations
        out.write(frame)

    # Release everything when done
    cap.release()
    out.release()

def scale_coords(img1_shape, coords, img0_shape):
    # Calculate the gain (the amount of scaling done to the original image)
    gain = min(img1_shape / img0_shape[0], img1_shape / img0_shape[1])
    pad = (img1_shape - img0_shape[1] * gain) / 2, (img1_shape - img0_shape[0] * gain) / 2  # Padding added during resize
    coords[:, [0, 2]] -= pad[0]  # Remove padding from x-coordinates
    coords[:, [1, 3]] -= pad[1]  # Remove padding from y-coordinates
    coords[:, :4] /= gain  # Scale bounding box back to original image size
    clamp = lambda x: max(min(x, img0_shape[1] - 1), 0)
    coords[:, :4] = coords[:, :4].apply_(clamp)  # Clamp coordinates
    return coords
def process_frame(frame, model, width, height, input_size=640):
    # Resize frame to input size expected by the model
    frame_resized = cv2.resize(frame, (input_size, input_size))

    # Convert the frame to a tensor
    frame_tensor = transforms.ToTensor()(frame_resized).unsqueeze(0)

    # If CUDA is available, move the tensor to GPU for faster processing
    if torch.cuda.is_available():
        frame_tensor = frame_tensor.cuda()
        model.cuda()

    # Get the model predictions
    with torch.no_grad():
        predictions = model(frame_tensor)

    # Filter out predictions with low confidence
    predictions = predictions[0][predictions[0][:, 4] > 0.6]

    # Scale the bounding box coordinates back to the size of the original frame
    scaled_coords = scale_coords(input_size, predictions[:, :4], frame.shape[:2])
    predictions[:, :4] = scaled_coords

    # Ensure we're handling the tensor format correctly
    faces = []
    for i in range(predictions.shape[0]):
        det = predictions[i].cpu().numpy()
        x1, y1, x2, y2, conf, cls = det[:6]
        faces.append((x1, y1, x2, y2, conf, int(cls)))

    return faces



# Replace with the path to your video
video_path = '/home/satish/Downloads/ss.mp4'  # Update this with the correct path
process_video(video_path, model)


Using cache found in /home/satish/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-2-22 Python-3.10.12 torch-2.2.0+cu121 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
