# Requirements for video and its audio

- Video format (container): mp4
- Video codec: h.264
- Audio codec: aac
- Frame rate: 25 FPS
- Aspect ratio: 16:9
- Resolution: 640 x 360
- Video bit rate: 2 – 5 Mb/s
- Audio bit rate: up to 256 kb/s
- Audio channels: stereo

From a series of video files, the application should generate a brief report in TXT indicating which films do not respect the digital format specified by the festival and what are the ‘problematic’ fields.

# ffprobe is to examine files

# ffmpeg is to modify files

In [1]:
file_directories = ['Cosmos_War_of_the_Planets.mp4','Last_man_on_earth_1964.mov',
                      'The_Gun_and_the_Pulpit.avi','The_Hill_Gang_Rides_Again.mp4',
                      'Voyage_to_the_Planet_of_Prehistoric_Women.mp4']

#file_directories = ['The_Gun_and_the_Pulpit.avi']

In [2]:
import re
#examine given audio files and generate report
def generate_report(file_directory:list):
    '''pass the directory of the video files'''
    assert isinstance(file_directory,list), 'arguement passed in was not list of string'
    
    #store files that dont respect the format
    disrespect = []
    files_to_reformat = []
    
    for file in file_directory:
        #generate 'IPython.utils.text.SList' using ffprobe   
        command = f"ffprobe -hide_banner {file}"
        metadata = !{command}

        #list of substrings to check video specs
        video_specs_substring = ['h264','640x360','16:9','25 fps']
        audio_specs_substring = ['aac','stereo']
        is_stereo = False

        for line in metadata:
            #check if mp4        
            if 'Input #0' in line:
                video_format = line.split('.')[1]
                if 'mp4' not in video_format:
                    disrespect.append(file)
                    disrespect.append('is not in mp4 format\n')
                else:
                    disrespect.append(file+'\n')

            #check video requirements        
            if 'Stream #0:0' in line:
                #check if the bit rate is correct range first
                if 'kb/s' in line:
                    #use regex to look for digits and kbs
                    match = re.search(r'(\d+) kb/s', line)
                    bitrate = match[1]
                    #convert string to int to check range
                    bitrate = int(bitrate)
                    if 2000 <= bitrate <= 5000:
                        pass
                    else:
                        disrespect.append('Video bit rate NOT in range 2-5 mb/s\n')

                #in the list comprehension, we check if the line contains the requiremnts spec substring
                #for each substring, if the line contains the correct spec, to returns True, else False
                #all() checks if all values are True, if even one bool is False, it returns False
                video_checker = [e in line for e in video_specs_substring] 
                for num,i in enumerate(video_checker):
                    if i == False and num == 0:
                        disrespect.append('File does not meet the video codec: h.264\n')
                    elif i == False and num == 1:
                        disrespect.append('File does not meet the resolution: 640x360\n')
                    elif i == False and num == 2:
                        disrespect.append('File does not meet the aspect ratio: 16:9\n')
                    elif i == False and num == 3:
                        disrespect.append('File does not meet the framerate: 25 fps\n')

            #check audio requirements:
            if 'Stream #0:1' in line:
                if 'kb/s' in line:
                    #use regex to look for digits and kbs
                    match = re.search(r'(\d+) kb/s', line)
                    audio_bitrate = match[1]
                    #convert string to int to check range
                    audio_bitrate = int(audio_bitrate)
                    if audio_bitrate > 256:
                        disrespect.append('Audio bit rate is ABOVE 256 kbs\n')
                        
                #check if audio contains string 'channel', as the avi file string is different
                if 'channel' in line:
                    get_line = line.split(',')
                    for i in get_line:
                        if 'channel' in i:
                            get_num_channels = i.split()
                            #if this passes, file is in stereo
                            if get_num_channels[0] == '2':
                                is_stereo = True

                audio_checker = [k in line for k in audio_specs_substring]
                for num,i in enumerate(audio_checker):
                    if i == False and num == 0:
                        disrespect.append('File does not meet the audio codec: aac\n')
                    elif i == False and num == 1 and is_stereo == False:
                        print(is_stereo)
                        disrespect.append('File does not meet the audio channel: stereo\n')
                        
        #to help with line spacing and readablity of text file
        disrespect.append('\n')

    #generate report
    with open('report.txt', 'w') as f:
        f.write('  '.join(disrespect))

In [3]:
generate_report(file_directories)

## Things left to do
- continue if statement to check which parameters needs to reformat
- combine the reformatted audio and video together

In [4]:
#modify the necessary files
def reformat_files(report:str,file_directories:list):
    '''argurments are the report generated and list of files'''
    assert isinstance(report,str), 'arguement passed in was not directory string of report'    
    assert isinstance(file_directories,list), 'arguement passed in was not list of string'

    #read report
    read_report = []
    
    #substring to check against
    checker_substr = ['not in mp4','resolution','aspect', 'framerate','Video bit rate','video codec',
                      'Audio bit rate','audio codec','channel']
    
    with open(report, 'r') as f:
        current_file_report = []
        for lines in f:
            x = lines.strip()
            if x == '':
                read_report.append(current_file_report)
                current_file_report = []
            else:
                current_file_report.append(x)
          
    #loop through the videos and start reformatting
    for i,files in enumerate(file_directories):
        video_problems_string = ''.join(read_report[i])
        checker_bool = [e in video_problems_string for e in checker_substr]
        #bool value with True means we have to reformat, false means no adjustments needed
        
        #modify audio first
        #if True, change the audio codec, bit rate and sample rate
        if checker_bool[6] == True or checker_bool[7] == True:
            raw_title_no_extension = files[:-4]
            new_title = files.split('.')
            new_title = new_title[0] + '_formatOK'
            
            #extract audio
            command = f"ffmpeg -hide_banner -y -i {files} -vn audio/{raw_title_no_extension}.mp3"     
            run = !{command}
            
            audio_file_path = f'audio/{raw_title_no_extension}.mp3'
            
            #ffmpeg cannot overite exisiting files, so pay attention to file naming
            command = f"ffmpeg -hide_banner -y -i {audio_file_path} -ab 128000 -ar 22050 -codec:a aac audio/{raw_title_no_extension}_1.aac"     
            run = !{command}
            
        #if True, change the to mp4  
        if checker_bool[0] == True or checker_bool[5]:
            command = f"ffmpeg -hide_banner -y -i {files} videos/{raw_title_no_extension}.mp4"     
            run = !{command}


In [5]:
reformat_files('report.txt',file_directories)

In [6]:
!ffprobe -hide_banner audio/The_Gun_and_the_Pulpit_1.aac

[aac @ 000001f1c11571c0] Estimating duration from bitrate, this may be inaccurate
Input #0, aac, from 'audio/The_Gun_and_the_Pulpit_1.aac':
  Duration: 00:00:20.17, bitrate: 128 kb/s
  Stream #0:0: Audio: aac (LC), 22050 Hz, stereo, fltp, 128 kb/s
