## Install necessary files

In [1]:
!pip install ffprobe-python ffmpeg-python



In [2]:
import subprocess

try:
    subprocess.run(["ffprobe", "-version"], check=True)
    print("ffprobe is working correctly!")
except FileNotFoundError:
    print("Error: ffprobe not found. Make sure it is installed and in your system's PATH.")


Error: ffprobe not found. Make sure it is installed and in your system's PATH.


In [3]:
import os
os.environ["PATH"] += os.pathsep + "/opt/homebrew/bin"

In [4]:
subprocess.run(["ffprobe", "-version"], check=True)


ffprobe version 7.1.1 Copyright (c) 2007-2025 the FFmpeg developers
built with Apple clang version 16.0.0 (clang-1600.0.26.6)
configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.1.1_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --

CompletedProcess(args=['ffprobe', '-version'], returncode=0)

enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox --enable-audiotoolbox --enable-neon
libavutil      59. 39.100 / 59. 39.100
libavcodec     61. 19.101 / 61. 19.101
libavformat    61.  7.100 / 61.  7.100
libavdevice    61.  3.100 / 61.  3.100
libavfilter    10.  4.100 / 10.  4.100
libswscale      8.  3.100 /  8.  3.100
libswresample   5.  3.100 /  5.  3.100
libpostproc    58.  3.100 / 58.  3.100


In [5]:
from ffprobe import FFProbe
from os import listdir
from os.path import isfile, join
import ffmpeg
import pathlib

# current folder path
curr_path = pathlib.Path().resolve()

# Format to follow:
- Video format (container): mp4
- Video codec: h.264
- Audio codec: aac
- Frame rate: 25 FPS
- Aspect ratio: 16:9 
- Resolution: 640 x 360
- Video bit rate: 2 – 5 Mb/s
- Audio bit rate: up to 256 kb/s
- Audio channels: stereo

### Analyze Video and Audio

In [6]:
def find_problematic_fields(video_stream, audio_stream):
    '''
    Takes in the ffmpeg stream as input parameter which will be used to filter the video input.
    Only filter for video framerate and resolution will be performed, other field settings will be done when
    the ffmpeg output video file is made.
    
    Returns problematic_fields (String), video stream and audio stream
    '''
    problematic_fields = ""
    # file format setting will be set along with output file
    if file_format != "mp4":
        problematic_fields += "file_format "

    # video codec setting will be set along with output file
    if v_codec_name != "h264":
        problematic_fields += "video_codec "

    # audio codec setting will be set along with output file
    if a_codec_name != "aac":
        problematic_fields += "audio_codec "

    if v_frame_rate != "25":
        problematic_fields += "video_frame_rate "
        # section 11.90 fps of the link provided
        video_stream = ffmpeg.filter(video_stream, 'fps', fps=25, round='near')

    # aspect ratio setting will be set along with output file
    if v_aspect_ratio != "16:9":
        problematic_fields += "aspect_ratio "

    if v_resolution != "640 x 360":
        problematic_fields += "resolution "
        video_stream = ffmpeg.filter(video_stream, 'scale', w='640', h='360')

    # video bitrate setting will be set along with output file
    if v_bitrate < 2 or v_bitrate > 5:
        problematic_fields += "video_bitrate "

    # audio bitrate setting will be set along with output file
    if a_bitrate < 0 and a_bitrate > 256:
        problematic_fields += "audio_bitrate "

    # channels will be set to stereo by default as later on, we will be using the video and audio stream 
    # to generate the output file
    if a_channel_layout != "stereo" or a_channels != 2:
        problematic_fields += "channels_layout "
        
    return problematic_fields, video_stream, audio_stream

In [7]:
# Ensure curr_path is defined before using it
if 'curr_path' not in locals():
    raise NameError("curr_path is not defined. Please set it before running this cell.")

# Ensure the directory exists
import os
if not os.path.exists(f"{curr_path}/Exercise3_Films"):
    raise FileNotFoundError(f"Directory {curr_path}/Exercise3_Films does not exist.")

# Get all files in the Exercise3_Films folder, skipping system files
files = [f for f in os.listdir(f"{curr_path}/Exercise3_Films") if os.path.isfile(os.path.join(f"{curr_path}/Exercise3_Films", f)) and not f.startswith(".")]

# Open a text file to write analysis results
result_file = open("file_analysis_report.txt", "w")

for file in files:
    print(f"\nProcessing file: {file}")
    try:
        metadata = FFProbe(f"{curr_path}/Exercise3_Films/{file}")

        # Ensure metadata contains valid streams
        video_stream = metadata.streams[0] if len(metadata.streams) > 0 else None
        audio_stream = metadata.streams[1] if len(metadata.streams) > 1 else None

        # Assign values safely
        file_format = file.split('.')[-1]
        v_codec_name = video_stream.codec_name if video_stream else "Unknown"
        a_codec_name = audio_stream.codec_name if audio_stream else "Unknown"
        v_frame_rate = (float(video_stream.nb_frames) / float(video_stream.duration)) if (video_stream and video_stream.nb_frames and video_stream.duration) else "Unknown"
        v_aspect_ratio = video_stream.display_aspect_ratio if video_stream else "Unknown"
        v_resolution = f"{video_stream.width} x {video_stream.height}" if video_stream else "Unknown"
        v_bitrate = (float(video_stream.bit_rate) / 1000000) if (video_stream and video_stream.bit_rate) else None
        a_bitrate = (float(audio_stream.bit_rate) / 1000) if (audio_stream and audio_stream.bit_rate) else None
        a_channel_layout = audio_stream.channel_layout if audio_stream else "Unknown"
        a_channels = audio_stream.channels if audio_stream else "Unknown"

        # Display extracted information
        print(f"Video format: {file_format}")
        print(f"Video codec: {v_codec_name}")
        print(f"Audio codec: {a_codec_name}")
        print(f"Frame rate: {v_frame_rate} FPS")
        print(f"Aspect ratio: {v_aspect_ratio}")
        print(f"Resolution: {v_resolution}")
        print(f"Video bitrate: {v_bitrate if v_bitrate is not None else 'Unknown'} Mb/s")
        print(f"Audio bitrate: {a_bitrate if a_bitrate is not None else 'Unknown'} kb/s")
        print(f"Audio channel layout: {a_channel_layout}")
        print(f"Number of channels: {a_channels}")

        # Ensure find_problematic_fields is defined
        if "find_problematic_fields" not in globals():
            raise NameError("find_problematic_fields function is not defined. Please define it before running this cell.")

        # Check and fix problematic fields
        stream = ffmpeg.input(f"{curr_path}/Exercise3_Films/{file}")
        video_stream = stream.video if hasattr(stream, 'video') else None
        audio_stream = stream.audio if hasattr(stream, 'audio') else None

        # Handle v_bitrate safely in find_problematic_fields
        problematic_fields, video_stream, audio_stream = find_problematic_fields(video_stream, audio_stream)

        # Write findings to file
        if not problematic_fields:
            result_file.write(f"{file} - no issues found\n")
        else:
            result_file.write(f"{file} - {problematic_fields}\n")
            output_filename = f"{curr_path}/OutputFiles/{file.split('.')[0]}_formatOK.mp4"
            stream = ffmpeg.output(video_stream, audio_stream, output_filename, format='mp4', vcodec='h264', acodec='aac', video_bitrate='2.5M', audio_bitrate='256k', aspect='16:9')
            ffmpeg.run(stream, capture_stdout=True, capture_stderr=True)

    except ffmpeg.Error as e:
        print("FFmpeg Error:")
        print("stdout:", e.stdout.decode('utf8'))
        print("stderr:", e.stderr.decode('utf8'))

# Close the result file after analysis
result_file.close()



Processing file: Last_man_on_earth_1964.mov
Video format: mov
Video codec: prores
Audio codec: pcm_s16le
Frame rate: 23.976023976023978 FPS
Aspect ratio: 16:9
Resolution: 640 x 360
Video bitrate: 9.285191 Mb/s
Audio bitrate: 1536.0 kb/s
Audio channel layout: stereo
Number of channels: 2

Processing file: Voyage_to_the_Planet_of_Prehistoric_Women.mp4
Video format: mp4
Video codec: hevc
Audio codec: mp3
Frame rate: 29.97002997002997 FPS
Aspect ratio: 16:9
Resolution: 640 x 360
Video bitrate: 8.038857 Mb/s
Audio bitrate: 320.0 kb/s
Audio channel layout: stereo
Number of channels: 2

Processing file: The_Gun_and_the_Pulpit.avi
Video format: avi
Video codec: rawvideo
Audio codec: pcm_s16le
Frame rate: 25.0 FPS
Aspect ratio: N/A
Resolution: 720 x 404
Video bitrate: 87.438878 Mb/s
Audio bitrate: 1536.0 kb/s
Audio channel layout: unknown
Number of channels: 2

Processing file: Cosmos_War_of_the_Planets.mp4
Video format: mp4
Video codec: h264
Audio codec: aac
Frame rate: 29.97002997002997 FPS


## Check format of converted videos

In [8]:
files = [f for f in listdir(f"{curr_path}/OutputFiles/") if isfile(join(f"{curr_path}/OutputFiles/", f))]

for file in files:
    print(f"file: {file}")
    metadata=FFProbe(f"{curr_path}/OutputFiles/{file}")
    # retrieve stream information
    video_stream = metadata.streams[0]
    audio_stream = metadata.streams[1]

    # assign stream fields to variables
    file_format = file.split('.')[1]
    v_codec_name = video_stream.codec_name
    a_codec_name = audio_stream.codec_name
    v_frame_rate = float(video_stream.nb_frames) / float(video_stream.duration)
    v_aspect_ratio = video_stream.display_aspect_ratio
    v_resolution = f"{video_stream.width} x {video_stream.height}"
    v_bitrate = int(video_stream.bit_rate) / 1000000
    a_bitrate = int(audio_stream.bit_rate) / 1000
    a_channel_layout = audio_stream.channel_layout
    a_channels = audio_stream.channels 
    print(f"Video format (container): {file_format}")
    print(f"Video codec: {v_codec_name}")
    print(f"Audio codec: {a_codec_name}")
    print(f"Frame rate: {format(v_frame_rate, '.2f')} FPS")
    print(f"Aspect ratio: {v_aspect_ratio}")
    print(f"Resolution: {v_resolution}")
    print(f"v_bitrate: {format(v_bitrate, '.2f')}Mb/s")
    print(f"a_bitrate: {format(a_bitrate, '.2f')}kb/s")
    print(f"channel layout: {a_channel_layout}")
    print(f"channels: {a_channels}\n")

file: Last_man_on_earth_1964_formatOK.mp4
Video format (container): mp4
Video codec: h264
Audio codec: aac
Frame rate: 25.00 FPS
Aspect ratio: 16:9
Resolution: 640 x 360
v_bitrate: 2.57Mb/s
a_bitrate: 240.95kb/s
channel layout: stereo
channels: 2

file: Voyage_to_the_Planet_of_Prehistoric_Women_formatOK.mp4
Video format (container): mp4
Video codec: h264
Audio codec: aac
Frame rate: 25.00 FPS
Aspect ratio: 16:9
Resolution: 640 x 360
v_bitrate: 2.38Mb/s
a_bitrate: 246.17kb/s
channel layout: stereo
channels: 2

file: Cosmos_War_of_the_Planets_formatOK.mp4
Video format (container): mp4
Video codec: h264
Audio codec: aac
Frame rate: 25.00 FPS
Aspect ratio: 16:9
Resolution: 640 x 360
v_bitrate: 2.47Mb/s
a_bitrate: 245.59kb/s
channel layout: stereo
channels: 2

file: The_Hill_Gang_Rides_Again_formatOK.mp4
Video format (container): mp4
Video codec: h264
Audio codec: aac
Frame rate: 25.00 FPS
Aspect ratio: 16:9
Resolution: 640 x 360
v_bitrate: 2.48Mb/s
a_bitrate: 214.11kb/s
channel layout: ste