# Video pre-processing

This notebook uses ffmpeg to convert the raw videos into a reliably seekable format that works best with SLEAP and crops the videos if necessary. Enter values for each variable in the first cell below according to your setup and requirements. Each variable and corresponding value should look like the following,

`variable_name = value`

After entering values for all variables, click Cell > Run All in the top menu bar to execute the notebook and process the videos.

Note that the 'r' before the opening quotation mark for the `ffmpeg_path`, `input_folder`, and `output_folder` variables is required to ensure that backslashes in the folder path don't cause issues during processing, as the backslash is usually used to denote an "escaped" character in character strings.

## Parameters

In [None]:
# Enter the folder path for the extracted ffmpeg folder
# e.g. r"D:\Downloads\ffmpeg-6.1.1-essentials_build"
ffmpeg_path = r""

# Enter the folder path containing the raw video files
# e.g. r"D:\Downloads\Ant videos"
input_folder = r""

# Enter the folder path for the output folder where the processed videos should be saved
# e.g. r"D:\Downloads\Ant videos\processed"
output_folder = r""

# Enter a suffix that should be appended to the end of the filenames of the processed videos if required (e.g. "_proc")
# or leave as a blank string (i.e. "") if no suffix is required
output_suffix = "_proc"

# Set to True (without quotation marks) if the processed videos should be cropped to middle-left of the frame
# (e.g. for weaver ant pulling chain videos) or False otherwise
crop_videos = True

## Code execution

In [None]:
# Imports the Python modules required for the notebook to run
import os
import subprocess

# Recursively searches the input path for the ffmpeg.exe executable
def find_exe(path):
    for entry in os.scandir(path):
        if entry.is_file() and entry.name == 'ffmpeg.exe':
            return entry.path
        elif entry.is_dir():
            found_path = find_exe(entry.path)
            
            if found_path is not None:
                return found_path
    
    return None

# If the ffmpeg_path leads directly to ffmpeg.exe, check that the path is valid
if ffmpeg_path.endswith("ffmpeg.exe"):    
    if not os.path.exists(ffmpeg_path):
        raise FileNotFoundError(f"ffmpeg.exe not found in {ffmpeg_path}, check that the path is correct")
    
    ffmpeg_loc = ffmpeg_path
else:
    # Search the path for the ffmpeg.exe file
    ffmpeg_loc = find_exe(ffmpeg_path)
    
    if ffmpeg_loc is None:
        raise FileNotFoundError(f"ffmpeg.exe not found in {ffmpeg_path}, check that the correct folder has been set")

In [None]:
# Check that the input and output folders exist
if not os.path.exists(input_folder):
    raise FileNotFoundError("Input folder not found, check that the input_folder path is correct")
    
if not os.path.exists(output_folder):
    raise FileNotFoundError("Output folder does not exist, check that the output_folder path is correct")

In [None]:
# Process each video in the input folder and save it to the output folder
param = [ffmpeg_loc,
         '-y',
         '-i', f'{entry.path}',
         '-vf' if crop_videos,
         'crop=in_w/2:in_h/2:0:in_h/4' if crop_videos,
         '-c:v', 'libx264',
         '-pix_fmt', 'yuv420p',
         '-preset', 'superfast',
         '-crf', '23',
         f'{output_filename}'
        ]

params = [p for p in params if p]

for entry in os.scandir(input_folder):
    if entry.is_file() and entry.name.endswith('.mp4'):
        output_filename = os.path.join(output_folder, "".join(entry.name.split('.')[0:-1])) + output_suffix + ".mp4"
        process = subprocess.run([ffmpeg_loc,
                                  '-y',
                                  '-i', f'{entry.path}',
                                  '-vf', 'crop=in_w/2:in_h/2:0:in_h/4',
                                  '-c:v', 'libx264',
                                  '-pix_fmt', 'yuv420p',
                                  '-preset', 'superfast',
                                  '-crf', '23',
                                  f'{output_filename}'
                                 ])