## Video Homogenizer

In [1]:
#!pip install moviepy

In [2]:
# Importing useful dependencies
import io
import os
import boto3
import warnings
import tempfile
from moviepy import VideoFileClip

# The following line ignores any warnings MoviePy would normally print (like those ffmpeg frame read errors) just won’t show up.
warnings.filterwarnings("ignore", category=UserWarning, module="moviepy")

In [3]:
# Setup S3 client for MinIO (MinIO implements Amazon S3 API)
s3 = boto3.client(
    "s3",
    endpoint_url="http://127.0.0.1:9000", # MinIO API endpoint
    aws_access_key_id="minioadmin", # User name
    aws_secret_access_key="minioadmin", # Password
)

In [4]:
# This function scans an bucket for non-MP4 videos, downloads them, converts them to MP4 with MoviePy,
# uploads the MP4s back to S3, and deletes the originals.
def convert_videos_to_mp4(bucket, prefix=""):
    paginator = s3.get_paginator("list_objects_v2")
    for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
        for obj in page.get("Contents", []):

            key = obj["Key"]

            if obj['Size'] == 0 and key.endswith("/"): # skip the folder itself
                continue

            ext = os.path.splitext(key)[1].lower() # define extension

            if ext == ".mp4":
                continue # already MP4

            # New key with .mp4 extension
            new_key = os.path.splitext(key)[0] + ".mp4"

            tmp_in_path = tmp_out_path = None
            clip = None
            try:
                # Download to temp input file
                with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp_in:
                    resp = s3.get_object(Bucket=bucket, Key=key)
                    tmp_in.write(resp["Body"].read())
                    tmp_in.flush()
                    tmp_in_path = tmp_in.name

                # Temp output file
                with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_out:
                    tmp_out_path = tmp_out.name

                # Convert using MoviePy (wrap ffmpeg with err_detect ignore)
                clip = VideoFileClip(tmp_in_path)
                # Suppress MoviePy console output by passing verbose=False and logger=None
                clip.write_videofile(tmp_out_path, codec="libx264", audio_codec="aac", logger=None)

                # Upload MP4 back to bucket
                with open(tmp_out_path, "rb") as f:
                    s3.upload_fileobj(f, Bucket=bucket, Key=new_key, ExtraArgs={"ContentType": "video/mp4"})

                # Delete original object
                s3.delete_object(Bucket=bucket, Key=key)

                print(f"Replaced: {key} -> {new_key}")

            except Exception as e:
                print(f"Failed converting {key}: {e}")

            finally:
                
                # close clip if opened
                try:
                    if clip is not None:
                        clip.close()
                except Exception:
                    pass

                # cleanup temp files
                if tmp_in_path and os.path.exists(tmp_in_path):
                    try: os.remove(tmp_in_path)
                    except Exception: pass
                if tmp_out_path and os.path.exists(tmp_out_path):
                    try: os.remove(tmp_out_path)
                    except Exception: pass

In [5]:
# Change the format of videos to MP4
convert_videos_to_mp4(bucket = "formatted-zone", prefix = "videos/")