In [8]:
import cv2
import mediapipe as mp
import numpy as np
import os
import subprocess
import json


In [None]:

# === CONFIGURATION ===
input_folder = "input_videos"
output_folder = "cropped_videos"
output_size = (128, 128)  # target frame size

FFMPEG_BIN = r"C:\ffmpeg-7.1.1-full_build\bin\ffmpeg.exe"
FFPROBE_BIN = r"C:\ffmpeg-7.1.1-full_build\bin\ffprobe.exe"

os.makedirs(output_folder, exist_ok=True)

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

LIPS_OUTER = [
    61, 146, 91, 181, 84, 17, 314, 405, 321,
    375, 291, 308, 324, 318, 402, 317, 14, 87,
    178, 88, 95, 185, 40, 39, 37, 0, 267, 269,
    270, 409, 415, 310, 311, 312, 13, 82, 81,
    42, 183, 78
]

# === UTILITY FUNCTIONS ===

def resize_with_padding(image, target_size):
    target_w, target_h = target_size
    h, w = image.shape[:2]
    scale = min(target_w / w, target_h / h)
    new_w, new_h = int(w * scale), int(h * scale)
    resized = cv2.resize(image, (new_w, new_h))
    pad_x = (target_w - new_w) // 2
    pad_y = (target_h - new_h) // 2
    result = np.zeros((target_h, target_w, 3), dtype=np.uint8)
    result[pad_y:pad_y+new_h, pad_x:pad_x+new_w] = resized
    return result

def get_video_rotation(path):
    try:
        cmd = [
            FFPROBE_BIN, "-v", "error", "-select_streams", "v:0",
            "-show_entries", "stream_tags=rotate",
            "-of", "json", path
        ]
        output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
        data = json.loads(output)
        rotation = int(data["streams"][0]["tags"]["rotate"])
        return rotation
    except Exception:
        return 0

def correct_rotation(frame, rotation):
    if rotation == 90:
        return cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
    elif rotation == 180:
        return cv2.rotate(frame, cv2.ROTATE_180)
    elif rotation == 270:
        return cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
    return frame

def merge_audio(original_video, processed_video, output_video):
    cmd = [
        FFMPEG_BIN,
        "-y",
        "-i", processed_video,
        "-i", original_video,
        "-c:v", "copy",
        "-c:a", "aac",
        "-map", "0:v:0",
        "-map", "1:a:0",
        output_video
    ]
    subprocess.run(cmd, check=True)

# === PROCESS VIDEOS ===
for filename in os.listdir(input_folder):
    if not filename.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
        continue

    input_path = os.path.join(input_folder, filename)
    temp_output_path = os.path.join(output_folder, "temp_" + filename)
    final_output_path = os.path.join(output_folder, filename)

    print(f"Processing: {filename}")

    rotation = get_video_rotation(input_path)
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    if fps <= 0 or np.isnan(fps):
        fps = 25

    out = cv2.VideoWriter(
        temp_output_path,
        cv2.VideoWriter_fourcc(*'mp4v'),
        fps,
        output_size
    )

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = correct_rotation(frame, rotation)
        h, w, _ = frame.shape
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_mesh.process(rgb)

        final_frame = np.zeros((output_size[1], output_size[0], 3), dtype=np.uint8)

        if results.multi_face_landmarks:
            landmarks = results.multi_face_landmarks[0].landmark
            points = np.array([
                (int(lm.x * w), int(lm.y * h)) 
                for i, lm in enumerate(landmarks) if i in LIPS_OUTER
            ])

            if len(points) > 0:
                x, y, w_, h_ = cv2.boundingRect(points)
                x1, y1 = max(0, x - 10), max(0, y - 10)
                x2, y2 = min(frame.shape[1], x + w_ + 10), min(frame.shape[0], y + h_ + 10)

                if x2 > x1 and y2 > y1:
                    crop = frame[y1:y2, x1:x2]
                    if crop.size > 0:
                        final_frame = resize_with_padding(crop, output_size)

        out.write(final_frame)

    cap.release()
    out.release()

    # Merge audio from original video
    merge_audio(input_path, temp_output_path, final_output_path)
    os.remove(temp_output_path)  
    print(f"✅ Saved video with audio: {final_output_path}")

print("All videos processed successfully with correct rotation, aspect ratio, and audio.")


Processing: eedf8aff-a19a-4c5d-9f8b-14f5352b0266.mp4
✅ Saved video with audio: cropped_videos\eedf8aff-a19a-4c5d-9f8b-14f5352b0266.mp4
Processing: WIN_20251014_15_19_00_Pro.mp4
✅ Saved video with audio: cropped_videos\WIN_20251014_15_19_00_Pro.mp4
🎬 All videos processed successfully with correct rotation, aspect ratio, and audio.
