## Video Homogenizer

In [1]:
# Importing useful dependencies
import io
import os
import boto3

In [3]:
import os
import tempfile
import subprocess
import boto3

In [2]:
# Setup S3 client for MinIO (MinIO implements Amazon S3 API)
s3 = boto3.client(
    "s3",
    endpoint_url="http://127.0.0.1:9000", # MinIO API endpoint
    aws_access_key_id="minioadmin", # User name
    aws_secret_access_key="minioadmin", # Password
)

In [None]:
all common video formats

In [10]:
def convert_videos_to_mp4(bucket, prefix=""):
    paginator = s3.get_paginator("list_objects_v2")
    for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
        for obj in page.get("Contents", []):

            if obj['Size'] == 0: # skip the folder itself
                continue

            key = obj["Key"]

            ext = os.path.splitext(key)[1].lower() # define extension

            if ext == ".mp4":
                continue # already MP4

            # New key with .mp4 extension
            new_key = os.path.splitext(key)[0] + ".mp4"

            try:
                # Download video to temporary file
                with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp_in:
                    resp = s3.get_object(Bucket=bucket, Key=key)
                    tmp_in.write(resp['Body'].read())
                    tmp_in_path = tmp_in.name

                # Temporary file for output MP4
                with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_out:
                    tmp_out_path = tmp_out.name

                # Convert using FFmpeg
                cmd = [
                    "ffmpeg", "-y",
                    "-i", tmp_in_path,
                    "-c:v", "libx264",
                    "-c:a", "aac",
                    "-strict", "experimental",
                    tmp_out_path
                ]
                subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

                # Upload MP4 back
                with open(tmp_out_path, "rb") as f:
                    s3.upload_fileobj(f, Bucket=bucket, Key=new_key, ExtraArgs={"ContentType": "video/mp4"})

                # Delete original video
                s3.delete_object(Bucket=bucket, Key=key)

                print(f"Replaced: {key} -> {new_key}")

            except Exception as e:
                print(f"Failed converting {key}: {e}")

            finally:
                
                # Clean up temp files
                if os.path.exists(tmp_in_path):
                    os.remove(tmp_in_path)
                if os.path.exists(tmp_out_path):
                    os.remove(tmp_out_path)

In [11]:
# Change the format of videos to MP4
convert_videos_to_mp4(bucket = "formatted-zone", prefix = "videos-test/")

Failed converting videos-test/file_example_AVI_1920_2_3MG.avi: [WinError 2] El sistema no puede encontrar el archivo especificado
Failed converting videos-test/file_example_MOV_1920_2_2MB.mov: [WinError 2] El sistema no puede encontrar el archivo especificado
Failed converting videos-test/file_example_WEBM_1920_3_7MB.webm: [WinError 2] El sistema no puede encontrar el archivo especificado
Failed converting videos-test/file_example_WMV_1920_9_3MB.wmv: [WinError 2] El sistema no puede encontrar el archivo especificado
Failed converting videos-test/sample_1280x720.mkv: [WinError 2] El sistema no puede encontrar el archivo especificado
Failed converting videos-test/sample_960x400_ocean_with_audio.mkv: [WinError 2] El sistema no puede encontrar el archivo especificado
