# Requirements for video and its audio

- Video format (container): mp4
- Video codec: h.264
- Audio codec: aac
- Frame rate: 25 FPS
- Aspect ratio: 16:9
- Resolution: 640 x 360
- Video bit rate: 2 – 5 Mb/s
- Audio bit rate: up to 256 kb/s
- Audio channels: stereo

In [1]:
file_directories = ['Cosmos_War_of_the_Planets.mp4','Last_man_on_earth_1964.mov',
                      'The_Gun_and_the_Pulpit.avi','The_Hill_Gang_Rides_Again.mp4',
                      'Voyage_to_the_Planet_of_Prehistoric_Women.mp4']

In [2]:
#libraries required to generate report
import re

In [3]:
#examine given audio files and generate report
def generate_report(file_directory:list):
    '''pass the directory of the video files'''
    assert isinstance(file_directory,list), 'arguement passed in was not list of string'
    
    #store files that dont respect the format
    disrespect = []
    files_to_reformat = []
    
    for file in file_directory:
        #generate 'IPython.utils.text.SList' using ffprobe   
        command = f"ffprobe -hide_banner {file}"
        metadata = !{command}

        #list of substrings to check video specs
        video_specs_substring = ['h264','640x360','16:9','25 fps']
        audio_specs_substring = ['aac','stereo']
        is_stereo = False

        for line in metadata:
            #check if mp4        
            if 'Input #0' in line:
                video_format = line.split('.')[1]
                if 'mp4' not in video_format:
                    disrespect.append(file)
                    disrespect.append('is not in mp4 format\n')
                else:
                    disrespect.append(file+'\n')

            #check video requirements        
            if 'Stream #0:0' in line:
                #check if the bit rate is correct range first
                if 'kb/s' in line:
                    #use regex to look for digits and kbs
                    match = re.search(r'(\d+) kb/s', line)
                    bitrate = match[1]
                    #convert string to int to check range
                    bitrate = int(bitrate)
                    if 2000 <= bitrate <= 5000:
                        pass
                    else:
                        disrespect.append('Video bit rate NOT in range 2-5 mb/s\n')

                #in the list comprehension, we check if the line contains the requiremnts spec substring
                #for each substring, if the line contains the correct spec, to returns True, else False
                #all() checks if all values are True, if even one bool is False, it returns False
                video_checker = [e in line for e in video_specs_substring] 
                for num,i in enumerate(video_checker):
                    if i == False and num == 0:
                        disrespect.append('File does not meet the video codec: h.264\n')
                    elif i == False and num == 1:
                        disrespect.append('File does not meet the resolution: 640x360\n')
                    elif i == False and num == 2:
                        disrespect.append('File does not meet the aspect ratio: 16:9\n')
                    elif i == False and num == 3:
                        disrespect.append('File does not meet the framerate: 25 fps\n')

            #check audio requirements:
            if 'Stream #0:1' in line:
                if 'kb/s' in line:
                    #use regex to look for digits and kbs
                    match = re.search(r'(\d+) kb/s', line)
                    audio_bitrate = match[1]
                    #convert string to int to check range
                    audio_bitrate = int(audio_bitrate)
                    if audio_bitrate > 256:
                        disrespect.append('Audio bit rate is ABOVE 256 kbs\n')
                        
                #check if audio contains string 'channel', as the avi file string is different
                if 'channel' in line:
                    get_line = line.split(',')
                    for i in get_line:
                        if 'channel' in i:
                            get_num_channels = i.split()
                            #if this passes, file is in stereo
                            if get_num_channels[0] == '2':
                                is_stereo = True

                audio_checker = [k in line for k in audio_specs_substring]
                for num,i in enumerate(audio_checker):
                    if i == False and num == 0:
                        disrespect.append('File does not meet the audio codec: aac\n')
                    elif i == False and num == 1 and is_stereo == False:
                        disrespect.append('File does not meet the audio channel: stereo\n')
                        
        #to help with line spacing and readablity of text file
        disrespect.append('\n')

    #generate report
    with open('report.txt', 'w') as f:
        f.write('  '.join(disrespect))

In [4]:
generate_report(file_directories)

In [5]:
#libraries required to convert films
import os
import glob

In [6]:
#modify the necessary files
def reformat_files(report:str,file_directories:list):
    '''argurments are the report generated and list of files'''
    assert isinstance(report,str), 'arguement passed in was not directory string of report'    
    assert isinstance(file_directories,list), 'arguement passed in was not list of string'

    #read report
    read_report = []
    
    #substring to check against
    checker_substr = ['not in mp4','resolution','aspect', 'framerate','Video bit rate','video codec',
                      'Audio bit rate','audio codec','channel']
    
    with open(report, 'r') as f:
        current_file_report = []
        for lines in f:
            x = lines.strip()
            if x == '':
                read_report.append(current_file_report)
                current_file_report = []
            else:
                current_file_report.append(x)
          
    #loop through the videos and start reformatting
    for i,files in enumerate(file_directories):
        video_problems_string = ''.join(read_report[i])
        checker_bool = [e in video_problems_string for e in checker_substr]
        #bool value with True means we have to reformat, false means no adjustments needed

        audio_edited_path = None
        video_edited_path = None
        
        raw_title_no_extension = files[:-4]
        raw_title_extension = files[-3:]
        new_title = files.split('.')
        new_title = new_title[0] + '_formatOK'
        
        #modify AUDIO first
        #if True, change the audio codec, bit rate, sample rate and channel
        if checker_bool[6] == True or checker_bool[7] == True or checker_bool[8] == True: #(audio bit rate or codec or channel)      
            #extract audio
            command = f"ffmpeg -hide_banner -y -i {files} -vn audio/{raw_title_no_extension}.mp3"     
            run = !{command}
            
            #ffmpeg cannot overite exisiting files, so pay attention to file naming
            command = f"ffmpeg -hide_banner -y -i audio/{raw_title_no_extension}.mp3 -ab 128000 -ar 22050 -ac 2 -codec:a aac audio/{raw_title_no_extension}.aac"     
            run = !{command}
            
            #delete original audio, we use will new edited audio henceforth
            os.remove(f"audio/{raw_title_no_extension}.mp3")
                       
            audio_edited_path = f"audio/{raw_title_no_extension}.aac"
            
        #modify VIDEO specs
        if checker_bool[1] == True or checker_bool[2] == True: #(resolution or aspect ratio or fps)
            command = f"ffmpeg -hide_banner -y -i {files} -vf \"scale=640:360,setsar=1:1\" -b:v 2500k -minrate 2000k -maxrate 5000k -r 25 videos/{raw_title_no_extension}_1.{raw_title_extension}"
            run = !{command}
            
            video_edited_path = f"videos/{raw_title_no_extension}_1.{raw_title_extension}"
            
        if checker_bool[4] == True and video_edited_path != None: #(video bit rate --> resolution was changed)
            command = f"ffmpeg -hide_banner -y -i {video_edited_path} -b:v 2500k -minrate 2000k -maxrate 5000k videos/{raw_title_no_extension}_2.{raw_title_extension}"
            run = !{command}

            video_edited_path = f'videos/{raw_title_no_extension}_2.{raw_title_extension}'
        
        if checker_bool[4] == True and video_edited_path == None: #(video bit rate --> resolution was not changed)
            command = f"ffmpeg -hide_banner -y -i {files} -b:v 2500k -minrate 2000k -maxrate 5000k videos/{raw_title_no_extension}_3.{raw_title_extension}"
            run = !{command}
            
            video_edited_path = f"videos/{raw_title_no_extension}_3.{raw_title_extension}"
            
            
        if checker_bool[5] == True: #(video codec)
            command = f"ffmpeg -hide_banner -i {video_edited_path} -b:v 2500k -minrate 2000k -maxrate 5000k -c:v libx264 -filter:v fps=fps=25 videos/{raw_title_no_extension}_4.{raw_title_extension}"
            run = !{command}
            
            video_edited_path = f"videos/{raw_title_no_extension}_4.{raw_title_extension}"
        
        if checker_bool[0] == True or checker_bool[0] == False: #(video format) OR is just to rename file properly
            command = f"ffmpeg -hide_banner -y -i {video_edited_path} -vcodec copy -acodec copy videos/{raw_title_no_extension}.mp4"
            run = !{command}
            video_edited_path = f"videos/{raw_title_no_extension}.mp4"          
            
        #if both audio and video was edited, merge and return
        if audio_edited_path != None and video_edited_path != None:
            command = f"ffmpeg -hide_banner -y -i {video_edited_path} -i {audio_edited_path} -shortest -c:v copy -map 0:v:0 -map 1:a:0 reformat_videos/{new_title}.mp4"
            run = !{command}
            
        #if ONLY video was edited
        if audio_edited_path == None and video_edited_path != None:
            command = f"ffmpeg -hide_banner -y -i {video_edited_path} -vcodec copy reformat_videos/{new_title}.mp4"
            run = !{command}
            
        #if ONLY audio was edited
        if audio_edited_path != None and video_edited_path == None:
            command = f"ffmpeg -hide_banner -y -i {files} -i {audio_edited_path} -shortest -c:v copy -map 0:v:0 -map 1:a:0 reformat_videos/{new_title}.mp4"
            run = !{command}

In [7]:
reformat_files('report.txt',file_directories)

Remove all the edited files that ffmpeg made in the process, to clear space in the local system.

In [8]:
files = glob.glob('videos/*')
for f in files:
    os.remove(f)

files = glob.glob('audio/*')
for f in files:
    os.remove(f)

To check if the program created the new copies of the films in the correct format, we can simply call the `generate_report` function on the new copies, and read the report.

In [9]:
check_formatOK = ['reformat_videos/Cosmos_War_of_the_Planets_formatOK.mp4','reformat_videos/Last_man_on_earth_1964_formatOK.mp4',
                  'reformat_videos/The_Gun_and_the_Pulpit_formatOK.mp4','reformat_videos/The_Hill_Gang_Rides_Again_formatOK.mp4',
                  'reformat_videos/Voyage_to_the_Planet_of_Prehistoric_Women_formatOK.mp4']

generate_report(check_formatOK)