**Content-Aware Video Cropping**

Content-aware video cropping solution using the YOLOv8 object detection model. It processes landscape videos, crops them to a specified aspect ratio (e.g., portrait), and ensures that the key objects or actions remain in focus. The output video retains the original audio for seamless integration.

-- Also tried RCNN (Currenlty not in these code) for better accuracy but it speed is slow that's why I procced with YOLOv8

RCNN - Better accuracy , slow

YOLOv8 - less accuracy Compare to RCNN , Faster

In [None]:
!pip install ultralytics

In [None]:
import cv2
from ultralytics import YOLO
import numpy as np
import subprocess
import os

In [None]:
# prompt: code for drive mount
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Loads the YOLOv8 model for object detection.**

In [None]:
def load_yolov8_model(model_path):
    """
    Load the YOLOv8 model.
    """
    model = YOLO(model_path)
    return model

**Detects objects in a video frame using the YOLOv8 model.**

In [None]:
def detect_objects_yolov8(frame, model):
    """
    Detect objects in a frame using YOLOv8.
    """
    # Extract bounding box details
    results = model(frame, stream=False)
    detections = []

    for box in results[0].boxes:
        x_min, y_min, x_max, y_max = map(int, box.xyxy[0].tolist())
        confidence = float(box.conf[0])
        class_id = int(box.cls[0])
        detections.append({
            'x_min': x_min, 'y_min': y_min,
            'x_max': x_max, 'y_max': y_max,
            'confidence': confidence, 'class_id': class_id
        })

    return detections

**Selects the most relevant object based on confidence, size, and proximity to the frame center.**

In [None]:
def prioritize_objects(detections, frame_width, frame_height):
    """
    Prioritize objects based on confidence, size, and proximity to the center.
    """

    if not detections:
        return None
    frame_center = (frame_width / 2, frame_height / 2)

    def calculate_score(roi):
        x_center = (roi['x_min'] + roi['x_max']) / 2
        y_center = (roi['y_min'] + roi['y_max']) / 2
        width = roi['x_max'] - roi['x_min']
        height = roi['y_max'] - roi['y_min']
        area = width * height
        distance_to_center = ((x_center - frame_center[0]) ** 2 + (y_center - frame_center[1]) ** 2) ** 0.5
        return roi['confidence'] + 0.5 * area - 0.2 * distance_to_center

    return max(detections, key=calculate_score, default=None)

**Calculates a cropping box that matches the desired aspect ratio.**

In [None]:
def adjust_to_aspect_ratio(x_center, y_center, target_aspect_ratio, frame_width, frame_height):
    """
    Adjusts a cropping region to match the desired aspect ratio.
    """
    if frame_height * target_aspect_ratio <= frame_width:
        crop_height = frame_height
        crop_width = int(crop_height * target_aspect_ratio)
    else:
        crop_width = frame_width
        crop_height = int(crop_width / target_aspect_ratio)

    x1 = max(0, int(x_center - crop_width / 2))
    x2 = min(frame_width, int(x_center + crop_width / 2))
    y1 = max(0, int(y_center - crop_height / 2))
    y2 = min(frame_height, int(y_center + crop_height / 2))

    return x1, y1, x2, y2

**Applies content-aware cropping to a video while maintaining temporal smoothing.**

In [None]:
def process_video(input_path, output_path, model, target_aspect_ratio=9/16, smoothing_factor=0.9):
    """
    Process a video to perform content-aware cropping with temporal smoothing.
    """
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        print("Error: Unable to open video file.")
        return

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_width = int(frame_height * target_aspect_ratio)
    output_height = frame_height

    out = cv2.VideoWriter(output_path, fourcc, fps, (output_width, output_height))

    prev_crop = None

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        detections = detect_objects_yolov8(frame, model)
        main_object = prioritize_objects(detections, frame_width, frame_height)

        if main_object:
            x_center = (main_object['x_min'] + main_object['x_max']) / 2
            y_center = (main_object['y_min'] + main_object['y_max']) / 2
            x1, y1, x2, y2 = adjust_to_aspect_ratio(x_center, y_center, target_aspect_ratio, frame_width, frame_height)
        else:
            x1, y1, x2, y2 = adjust_to_aspect_ratio(frame_width // 2, frame_height // 2, target_aspect_ratio, frame_width, frame_height)

        # smoothing
        if prev_crop is not None:
            x1 = int(smoothing_factor * prev_crop[0] + (1 - smoothing_factor) * x1)
            y1 = int(smoothing_factor * prev_crop[1] + (1 - smoothing_factor) * y1)
            x2 = int(smoothing_factor * prev_crop[2] + (1 - smoothing_factor) * x2)
            y2 = int(smoothing_factor * prev_crop[3] + (1 - smoothing_factor) * y2)

        prev_crop = (x1, y1, x2, y2)

        cropped_frame = frame[y1:y2, x1:x2]
        cropped_frame = cv2.resize(cropped_frame, (output_width, output_height))
        out.write(cropped_frame)

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"Processed video saved to {output_path}")

**Combines audio from the original video with the processed video.**

In [None]:
def add_audio_to_video(input_video_path, processed_video_path, output_with_audio_path):
    """
    Add audio from the input video to the processed video using FFmpeg.
    """

    command = [
        "ffmpeg", "-i", processed_video_path, "-i", input_video_path, "-c:v", "copy",
        "-map", "0:v:0", "-map", "1:a:0", "-y", output_with_audio_path
    ]

    subprocess.run(command, check=True)
    print(f"Video with audio saved to {output_with_audio_path}")

In [None]:
if __name__ == "__main__":

    input_video_path = "/content/drive/MyDrive/Demo/Input/test1.mp4"
    processed_video_path = "/content/drive/MyDrive/Demo/output/output1.mp4"
    final_output_path = "/content/drive/MyDrive/Demo/output/foutput1.mp4"
    yolo_model_path = "yolov8n.pt"

    # Load YOLOv8 model
    model = load_yolov8_model(yolo_model_path)

    # Process the video
    process_video(input_video_path, processed_video_path, model)

    # Add audio to the processed video
    add_audio_to_video(input_video_path, processed_video_path, final_output_path)