# Film Format Check and Conversion

Installing ffmpeg and ffprobe

In [None]:
import subprocess
import json
import shutil
import os

# The code is referred from  Coursera exercise19
# Check if FFmpeg is installed and install it if missing
if not shutil.which("ffmpeg"):
    !curl https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz -o ffmpeg.tar.xz \
        && tar -xf ffmpeg.tar.xz && rm ffmpeg.tar.xz
    ffmdir = !find . -iname ffmpeg-*-static
    path = %env PATH
    path = path + ':' + ffmdir[0]
    %env PATH $path

# Verify FFmpeg installation
!ffmpeg -version

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-l

The format of the films specified by the festival organisation is:  
* Video format (container): mp4
* Video codec: h.264
* Audio codec: aac
* Frame rate: 25 FPS
* Aspect ratio: 16:9
* Resolution: 640 x 360
* Video bit rate: 2 – 5 Mb/s
* Audio bit rate: up to 256 kb/s
* Audio channels: stereo

In [None]:
# Define the expected format required by the film festival
EXPECTED_FORMAT = {
    "container": "mp4",
    "video_codec": "h264",
    "audio_codec": "aac",
    "frame_rate": 25,
    "aspect_ratio": "16:9",
    "resolution_width": 640,
    "resolution_height": 360,
    "video_br_min_mbs": 2,
    "video_br_max_mbs": 5,
    "audio_br_max_kbs": 256,
    "audio_channels": 2
}

To check film properties, two functions are created:
- Extracts film metadata using FFprobe
- Compare extracted metadata with expected format

In [None]:
# Extract metadata from a video file using ffprobe
def extract_metadata(file_path):
    try:
        # ffprobe command to get metadata in JSON format
        cmd = [
            "ffprobe", "-v", "error", "-show_streams", "-show_format",
            "-of", "json", file_path
        ]
        result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        metadata = json.loads(result.stdout)

        # Extract required metadata
        video_stream = next((stream for stream in metadata["streams"] if stream["codec_type"] == "video"), None)
        audio_stream = next((stream for stream in metadata["streams"] if stream["codec_type"] == "audio"), None)

        if not video_stream or not audio_stream:
            raise ValueError("Invalid video file. Missing video or audio stream.")

        # Extract video properties
        extracted_data = {
            "container": metadata["format"]["format_name"],
            "video_codec": video_stream["codec_name"].lower(),
            "audio_codec": audio_stream["codec_name"].lower(),
            "frame_rate": eval(video_stream["r_frame_rate"]),
            "aspect_ratio": f"{video_stream['width']}:{video_stream['height']}",
            "resolution_width": video_stream["width"],
            "resolution_height": video_stream["height"],
            "video_br_mbs": int(metadata["format"]["bit_rate"]) / 1e6,  # Convert bits to Mbps
            "audio_br_kbs": int(audio_stream.get("bit_rate", 0)) / 1e3,  # Convert bits to kbps
            "audio_channels": audio_stream["channels"]
        }

        return extracted_data

    except Exception as e:
        print(f"Error extracting metadata from {file_path}: {e}")
        return None


# Compare extracted metadata with expected format
def compare_format(metadata, expected_format):
    if not metadata:
        return "Error: Metadata extraction failed", []

    problems = []
    comparison_results = []

    def check_property(name, detected, required, condition):
        status = "Correct" if condition else "Mismatch"
        comparison_results.append(f"{name}: detected ({detected}) → {status}")
        if not condition:
            problems.append(name)

    # Compare properties
    detected_container = metadata["container"]
    check_property("Container", detected_container, expected_format["container"], "mp4" in detected_container)
    check_property("Video Codec", metadata["video_codec"], expected_format["video_codec"], metadata["video_codec"] == expected_format["video_codec"])
    check_property("Audio Codec", metadata["audio_codec"], expected_format["audio_codec"], metadata["audio_codec"] == expected_format["audio_codec"])
    check_property("Frame Rate", f"{metadata['frame_rate']} FPS", f"{expected_format['frame_rate']} FPS", abs(metadata["frame_rate"] - expected_format["frame_rate"]) <= 0.1)
    check_property("Resolution", f"{metadata['resolution_width']}x{metadata['resolution_height']}", f"{expected_format['resolution_width']}x{expected_format['resolution_height']}", metadata["resolution_width"] == expected_format["resolution_width"] and metadata["resolution_height"] == expected_format["resolution_height"])
    # Convert bitrates before checking
    video_br_mbs = metadata["video_br_mbs"]
    audio_br_kbs = metadata["audio_br_kbs"]
    check_property("Video Bitrate", f"{video_br_mbs:.2f} Mbps", f"{expected_format['video_br_min_mbs']} - {expected_format['video_br_max_mbs']} Mbps", expected_format["video_br_min_mbs"] <= video_br_mbs <= expected_format["video_br_max_mbs"])
    # Allow slight variation in audio bitrate
    tolerance = 10.0
    check_property("Audio Bitrate", f"{audio_br_kbs:.2f} kbps", f"≤ {expected_format['audio_br_max_kbs']} kbps", audio_br_kbs <= expected_format["audio_br_max_kbs"] + tolerance)
    check_property("Audio Channels", metadata["audio_channels"], expected_format["audio_channels"], metadata["audio_channels"] == expected_format["audio_channels"])

    # Determine overall status
    overall_status = "Format OK" if not problems else "Format Incorrect"

    return overall_status, comparison_results


Automates film format conversion

In [None]:
# Converts a video file to the required format
def convert_video(input_file, issues):
    try:
        # Generate output filename with '_formatOK' and in MP4 format
        file_name, file_ext = os.path.splitext(input_file)
        output_file = f"{file_name}_formatOK.mp4"

        # Check if only the container is incorrect,
        # If only the container is incorrect, remux without reencoding
        # If codec, frame rate, resolution, or bitrate are incorrect, perform re-encoding.
        container_issue = any("Container:" in issue for issue in issues)
        other_issues = any("Mismatch" in issue and "Container:" not in issue for issue in issues)

        if container_issue and not other_issues:
            print(f"Remuxing {input_file} → {output_file} (container change only)...")
            cmd = ["ffmpeg", "-i", input_file, "-c", "copy", output_file]
        else:
            print(f"Re-encoding {input_file} → {output_file} (format correction)...")
            cmd = [
                "ffmpeg", "-i", input_file,
                "-c:v", "libx264",      # Convert video to H.264
                "-b:v", "2M",           # Set video bitrate to 2 Mbps
                "-r", "25",             # Set frame rate to 25 FPS
                "-s", "640x360",        # Set resolution to 640x360
                "-c:a", "aac",          # Convert audio to AAC
                "-b:a", "256k",         # Set audio bitrate to 256 kbps
                "-ac", "2",             # Set stereo audio channels
                output_file
            ]

        # Execute FFmpeg
        result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

        if result.returncode == 0:
            print(f"Successfully converted: {input_file} → {output_file}")
            return output_file
        else:
            print(f"Error converting {input_file}: {result.stderr}")
            return None

    except Exception as e:
        print(f"Unexpected error during conversion: {e}")
        return None


Combine whole process to check the file foramt and Generate a report in TXT indicating which films do not respect the digital format specified by the festival and what are the ‘problematic’ fields.

In [None]:
# Generates a TXT report to summarize the video format verification results
def film_format_checker(video_files, report_filename):
    report_content = []

    report_content.append("Video Format Verification Report\n")
    report_content.append("="*40 + "\n")

    for video in video_files:
        report_content.append(f"File: {os.path.basename(video)}\n")

        # Extract metadata
        metadata = extract_metadata(video)
        status, results = compare_format(metadata, EXPECTED_FORMAT)

        # Append format check results
        report_content.append(f"Status: {status}\n")
        for result in results:
            report_content.append(f"   - {result}")

        # Convert the video if incorrect
        converted_file = None
        if status == "Format Incorrect":
            issues = [res for res in results if "Mismatch" in res]
            converted_file = convert_video(video, issues)
            if converted_file:
                report_content.append(f"Convert to expected format: {os.path.basename(converted_file)}\n")
            else:
                report_content.append(f"Conversion Failed!\n")
        else:
            report_content.append("No conversion needed.\n")

        report_content.append("="*40 + "\n")

    # Write to a TXT file
    with open(report_filename, "w") as report_file:
        report_file.writelines("\n".join(report_content))

    print(f" Report saved as: {report_filename}")
    print("\n".join(report_content)) # Display report content

    return report_filename


In [None]:
video_files = [
    "/content/The_Gun_and_the_Pulpit.avi",
    "/content/The_Hill_Gang_Rides_Again.mp4",
    "/content/Cosmos_War_of_the_Planets.mp4",
    "/content/Last_man_on_earth_1964.mov",
    "/content/Voyage_to_the_Planet_of_Prehistoric_Women.mp4"
]

film_format_checker(video_files, "video_format_check.txt")

Re-encoding /content/The_Gun_and_the_Pulpit.avi → /content/The_Gun_and_the_Pulpit_formatOK.mp4 (format correction)...
Successfully converted: /content/The_Gun_and_the_Pulpit.avi → /content/The_Gun_and_the_Pulpit_formatOK.mp4
Re-encoding /content/The_Hill_Gang_Rides_Again.mp4 → /content/The_Hill_Gang_Rides_Again_formatOK.mp4 (format correction)...
Successfully converted: /content/The_Hill_Gang_Rides_Again.mp4 → /content/The_Hill_Gang_Rides_Again_formatOK.mp4
Re-encoding /content/Cosmos_War_of_the_Planets.mp4 → /content/Cosmos_War_of_the_Planets_formatOK.mp4 (format correction)...
Successfully converted: /content/Cosmos_War_of_the_Planets.mp4 → /content/Cosmos_War_of_the_Planets_formatOK.mp4
Re-encoding /content/Last_man_on_earth_1964.mov → /content/Last_man_on_earth_1964_formatOK.mp4 (format correction)...
Successfully converted: /content/Last_man_on_earth_1964.mov → /content/Last_man_on_earth_1964_formatOK.mp4
Re-encoding /content/Voyage_to_the_Planet_of_Prehistoric_Women.mp4 → /conten

'video_format_check.txt'

Verify converted files to confirm they align with the expected format

In [None]:
# Function to recheck all converted files to confirm they align with the expected format.
def verify_converted_files(converted_files):
    print("Verifying Converted Video:")
    print("=" * 40)

    for converted_video in converted_files:
        print(f"Checking: {os.path.basename(converted_video)}")

        # Extract metadata
        metadata = extract_metadata(converted_video)
        status, results = compare_format(metadata, EXPECTED_FORMAT)

        # Print verification results
        print(f"Status: {status}")
        if status == "Format OK":
            print("File meets all format requirements.")
        else:
            print("Issues still detected after conversion!")
            for result in results:
                print(f"   - {result}")

        print("=" * 40)

In [None]:
converted_video_files = [
    "/content/The_Gun_and_the_Pulpit_formatOK.mp4",
    "/content/The_Hill_Gang_Rides_Again_formatOK.mp4",
    "/content/Cosmos_War_of_the_Planets_formatOK.mp4",
    "/content/Last_man_on_earth_1964_formatOK.mp4",
    "/content/Voyage_to_the_Planet_of_Prehistoric_Women_formatOK.mp4"
]

verify_converted_files(converted_video_files)

Verifying Converted Video:
Checking: The_Gun_and_the_Pulpit_formatOK.mp4
Status: Format OK
File meets all format requirements.
Checking: The_Hill_Gang_Rides_Again_formatOK.mp4
Status: Format OK
File meets all format requirements.
Checking: Cosmos_War_of_the_Planets_formatOK.mp4
Status: Format OK
File meets all format requirements.
Checking: Last_man_on_earth_1964_formatOK.mp4
Status: Format OK
File meets all format requirements.
Checking: Voyage_to_the_Planet_of_Prehistoric_Women_formatOK.mp4
Status: Format OK
File meets all format requirements.
