# Searching for files

In [9]:
import logging, os, json

def locate_files(arg):
    directories = ["C:\\Users\\cwest\\Videos\\BoilestDev"]
    extensions = ['.mp4', '.mkv', '.avi']

    print(f'Searching directories: {directories}')
    print(f'File extensions: {extensions}')

    for file_located in find_files(directories, extensions):
        logging.debug('File located, sending to ffprobe function')
        try:
            file_located_data = json.loads(file_located)
            logging.debug(json.dumps(file_located_data, indent=3, sort_keys=True))
            print(json.dumps(file_located_data, indent=3, sort_keys=True))
            # >>>>>>>>>>><<<<<<<<<<<<<<<<
            # >>>>>>>>>>><<<<<<<<<<<<<<<<
            print(file_located_data)
            # >>>>>>>>>>><<<<<<<<<<<<<<<<
            # >>>>>>>>>>><<<<<<<<<<<<<<<<
        except json.JSONDecodeError as e:
            logging.error(f'Failed to decode JSON: {e}')
            continue

def find_files(directories, extensions):
    for directory in directories:
        logging.info ('Scanning: ' + directory)
        for root, dirs, files in os.walk(directory):
            for file in files:
                for ext in extensions:
                    if file.lower().endswith(ext.lower()):
                        file_path = os.path.join(root, file)
                        result_dict = {
                            'directory': directory,
                            'root': root,
                            'file': file,
                            'file_path': file_path,
                            'extension': ext
                        }
                        yield json.dumps(result_dict)

In [10]:
locate_files('farts')

Searching directories: ['C:\\Users\\cwest\\Videos\\BoilestDev']
File extensions: ['.mp4', '.mkv', '.avi']
{
   "directory": "C:\\Users\\cwest\\Videos\\BoilestDev",
   "extension": ".mkv",
   "file": "test2.mkv",
   "file_path": "C:\\Users\\cwest\\Videos\\BoilestDev\\test2.mkv",
   "root": "C:\\Users\\cwest\\Videos\\BoilestDev"
}
{'directory': 'C:\\Users\\cwest\\Videos\\BoilestDev', 'root': 'C:\\Users\\cwest\\Videos\\BoilestDev', 'file': 'test2.mkv', 'file_path': 'C:\\Users\\cwest\\Videos\\BoilestDev\\test2.mkv', 'extension': '.mkv'}
{
   "directory": "C:\\Users\\cwest\\Videos\\BoilestDev",
   "extension": ".mkv",
   "file": "test22.mkv",
   "file_path": "C:\\Users\\cwest\\Videos\\BoilestDev\\test22.mkv",
   "root": "C:\\Users\\cwest\\Videos\\BoilestDev"
}
{'directory': 'C:\\Users\\cwest\\Videos\\BoilestDev', 'root': 'C:\\Users\\cwest\\Videos\\BoilestDev', 'file': 'test22.mkv', 'file_path': 'C:\\Users\\cwest\\Videos\\BoilestDev\\test22.mkv', 'extension': '.mkv'}
{
   "directory": "C:\\U

# Running FFprobe

In [3]:
import subprocess, json, os


def check_video_stream(encoding_decision, i, stream_info, ffmpeg_command):
    # Checks the video stream from check_codecs to determine if the stream needs encoding
    codec_name = stream_info['streams'][i]['codec_name'] 
    desired_video_codec = 'av1'
    print('Steam ' + str(i) + ' codec is: ' + codec_name)
    if codec_name == desired_video_codec:
        ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:v copy'
    elif codec_name == 'mjpeg':
        ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:v copy'
    elif codec_name != desired_video_codec: 
        encoding_decision = True
        svt_av1_string = "libsvtav1 -crf 25 -preset 4 -g 240 -pix_fmt yuv420p10le -svtav1-params filmgrain=20:film-grain-denoise=0:tune=0:enable-qm=1:qm-min=0:qm-max=15"
        ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:v ' + svt_av1_string
    else:
        print ('ignoring for now')
    return encoding_decision, ffmpeg_command

def check_audio_stream(encoding_decision, i, stream_info, ffmpeg_command):
    # Checks the audio stream from check_codecs to determine if the stream needs encoding
    codec_name = stream_info['streams'][i]['codec_name'] 
    # This will be populated at a later date
    #desired_audio_codec = 'aac'
    #if codec_name != desired_video_codec:
    #    encoding_decision = True
    print('Steam ' + str(i) + ' codec is: ' + codec_name)
    ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:a copy'
    return encoding_decision, ffmpeg_command
    
def check_subtitle_stream(encoding_decision, i, stream_info, ffmpeg_command):
    # Checks the subtitle stream from check_codecs to determine if the stream needs encoding
    codec_name = stream_info['streams'][i]['codec_name'] 
    # This will be populated at a later date
    #desired_subtitle_codec = 'srt'
    #if codec_name != desired_subtitle_codec:
    #    encoding_decision = True
    print('Steam ' + str(i) + ' codec is: ' + codec_name)
    ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:s copy'
    return encoding_decision, ffmpeg_command

def check_attachmeent_stream(encoding_decision, i, stream_info, ffmpeg_command):
    # Checks the attachment stream from check_codecs to determine if the stream needs encoding
    codec_name = stream_info['streams'][i]['codec_name'] 
    # This will be populated at a later date
    #desired_attachment_codec = '???'
    #if codec_name != desired_attachment_codec:
    #    encoding_decision = True
    print('Steam ' + str(i) + ' codec is: ' + codec_name)
    ffmpeg_command = ffmpeg_command + ' -map 0:' + str(i) + ' -c:t copy'
    return encoding_decision, ffmpeg_command

def check_codecs(stream_info, encoding_decision):
    # Loops through the streams in stream_info from requires_encoding, then
    # calls functions to determine if the steam needs encoding based on stream type conditions 
    streams_count = stream_info['format']['nb_streams']
    ffmpeg_command = str()
    print ('There are : ' + str(streams_count) + ' streams')
    for i in range (0,streams_count):
        codec_type = stream_info['streams'][i]['codec_type'] 
        if codec_type == 'video':
            encoding_decision, ffmpeg_command = check_video_stream(encoding_decision, i, stream_info, ffmpeg_command)
        elif codec_type == 'audio':
            encoding_decision, ffmpeg_command = check_audio_stream(encoding_decision, i, stream_info, ffmpeg_command)
        elif codec_type == 'subtitle':
            encoding_decision, ffmpeg_command = check_subtitle_stream(encoding_decision, i, stream_info, ffmpeg_command)
        elif codec_type == 'attachment':
            encoding_decision, ffmpeg_command = check_attachmeent_stream(encoding_decision, i, stream_info, ffmpeg_command)        
    return encoding_decision, ffmpeg_command


def check_container_extension(file, encoding_decision):
    base, ext = os.path.splitext(file)
    if ext.lower() != '.mkv':
        # Change the extension to .mkv
        file = base + '.mkv'
        encoding_decision = True
    ffmepg_output_file = '/boil_hold/' + file
    return encoding_decision, ffmepg_output_file

def check_container_type(stream_info, encoding_decision, file):
    # Desired container is MKV so we check for that, and pass True for all other container types
    format_name = stream_info['format'].get('format_name')
    print ('format is: ' + format_name)
    if format_name != 'matroska,webm':
        encoding_decision = True
    encoding_decision, ffmepg_output_file = check_container_extension(file, encoding_decision)
    print ('>>>check_container_type<<<  Container is: ' + format_name + ' so, encoding_decision is: ' + str(encoding_decision))
    return encoding_decision, ffmepg_output_file
    

def ffprobe_function(file_path):
    # Subprocess call to ffprobe to retrieve video info in JSON format
    ffprobe_command = f'ffprobe -loglevel quiet -show_entries format:stream=index,stream,codec_type,codec_name,channel_layout,format=nb_streams -of json "{file_path}"'
    result = subprocess.run(ffprobe_command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stream_info = json.loads(result.stdout)
    return stream_info

def file_size_kb(file_path):
    # Returns the file size of the file_path on disk
    if os.path.isfile(file_path):
        file_size_bytes = os.path.getsize(file_path)
        file_size_kb = file_size_bytes / 1024
        return round(file_size_kb)
    else:
        return 0.0
    

def requires_encoding(file_located_data):
    stream_info = ffprobe_function(file_located_data['file_path'])
    encoding_decision = False
    
    encoding_decision, ffmepg_output_file_name = check_container_type(stream_info, encoding_decision, file_located_data['file'])
    encoding_decision, ffmpeg_command = check_codecs(stream_info, encoding_decision)
    if encoding_decision == True:
        print ('file needs encoding')
        file_located_data['ffmpeg_command'] = ffmpeg_command
        file_located_data['ffmepg_output_file_name'] = ffmepg_output_file_name
        file_located_data['file_hash'] = file_size_kb(file_located_data['file_path'])
        print(json.dumps(file_located_data, indent=4))
    else:
        print ('file does not need encoding')
    print (encoding_decision)
    print (ffmpeg_command)


In [4]:
file_located_data = {
   "directory": "C:\\Users\\cwest\\Videos\\BoilestDev",
   "extension": ".mkv",
   "file": "test22.mkv",
   "file_path": "C:\\Users\\cwest\\Videos\\BoilestDev\\test2.mp4",
   "root": "C:\\Users\\cwest\\Videos\\BoilestDev"
}
requires_encoding(file_located_data)

format is: mov,mp4,m4a,3gp,3g2,mj2
>>>check_container_type<<<  Container is: mov,mp4,m4a,3gp,3g2,mj2 so, encoding_decision is: True
There are : 1 streams
Steam 0 codec is: h264
file needs encoding
{
    "directory": "C:\\Users\\cwest\\Videos\\BoilestDev",
    "extension": ".mkv",
    "file": "test22.mkv",
    "file_path": "C:\\Users\\cwest\\Videos\\BoilestDev\\test2.mp4",
    "root": "C:\\Users\\cwest\\Videos\\BoilestDev",
    "ffmpeg_command": " -map 0:0 -c:v libsvtav1 -crf 25 -preset 4 -g 240 -pix_fmt yuv420p10le -svtav1-params filmgrain=20:film-grain-denoise=0:tune=0:enable-qm=1:qm-min=0:qm-max=15",
    "ffmepg_output_file_name": "/boil_hold/test22.mkv",
    "file_hash": 757
}
True
 -map 0:0 -c:v libsvtav1 -crf 25 -preset 4 -g 240 -pix_fmt yuv420p10le -svtav1-params filmgrain=20:film-grain-denoise=0:tune=0:enable-qm=1:qm-min=0:qm-max=15


# Running FFMpeg

In [41]:
import subprocess, json, os, shutil

def process_ffmpeg(file_located_data):
    if delete_old_move_new(file_located_data) == True:
        print ('stuff')

def delete_old_move_new(file_located_data):
    if ffmpeg_postlaunch_checks(file_located_data) == True:
        moving_files = move_processed_file(file_located_data)
        if moving_files == True:
            print ('all done!')
            return True
        else:
            print ('delete_old_move_new failed')
            return False

def ffmpeg_postlaunch_checks(file_located_data):
    if encode_video(file_located_data) and file_exists(file_located_data['ffmepg_output_file_name']) and check_encode_output_size(file_located_data['ffmepg_output_file_name']) and validate_video(file_located_data['ffmepg_output_file_name']):
        print ('ffmpeg_postlaunch_checks succeeded')
        return True
    else:
        print ('ffmpeg_postlaunch_checks failed')
        return False


def encode_video(file_located_data):
    if ffmpeg_prelaunch_checks(file_located_data) == True:
        ffmpeg_command = build_ffmpeg_command(file_located_data['file_path'], file_located_data['ffmpeg_command'], file_located_data['ffmepg_output_file_name'])
        file_processed = run_ffmpeg(ffmpeg_command)
        return file_processed
    else:
        print('File failed to meet conditions, will not process')


def ffmpeg_prelaunch_checks(file_located_data):
    if file_exists(file_located_data['file_path']) and check_file_hash(file_located_data):
        print('file passed ffmpeg_prelaunch_checks')
        return True
    else:
        print('file failed ffmpeg_prelaunch_checks')
        return False


def file_exists(file_path):
    file_existance = os.path.isfile(file_path)
    # Returns true if the file that is about to be touched is in the expected location
    print (file_path + ' : ' + str(file_existance))
    return file_existance


def check_file_hash(file_located_data):
    original_file_hash = file_located_data['file_hash'] 
    print('original_file_hash: ' + str(original_file_hash))
    current_file_hash = file_size_kb(file_located_data['file_path'])
    print('current_file_hash: ' + str(current_file_hash))

    if original_file_hash == current_file_hash:
        print('Hashes match')
        return True
    else:
        print('Hashes do not match')
        return False


def check_encode_output_size(file_path):
    # Returns the file size of the file_path on disk
    if get_file_size_kb(file_path) > 0.0:
        return True
    else:
        return False
    

def get_file_size_kb(file_path):
    file_size_bytes = os.path.getsize(file_path)
    file_size_kb = round(file_size_bytes / 1024)
    return file_size_kb


def validate_video(file_path):
    # This function determines if a video is valid, or if the video contains errors
    # Returns:
    #       Failure if the shell command returns anything; i.e. one of the streams is bad
    #       Success if the shell command doesn't return anything; i.e. the streams are good
    #       Error if the shell command fails; this shouldn't happen
    try:
        command = 'ffmpeg -v error -i "' + file_path + '" -f null -'
        result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        if result.stdout or result.stderr:
            print ('File failed validation')
            return False
        else:
            print ('File passed validation')
            return True
    except Exception as e:
        return f"Error: {e}"
    
def run_ffmpeg(ffmpeg_command):
    print ('running ffmpeg now')
    try:
        process = subprocess.Popen(ffmpeg_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,universal_newlines=True)
        for line in process.stdout:
            print(line)
        return True
    except Exception as e:
        print(f"Error: {e}")
        return False  # Return a non-zero exit code to indicate an error
    
def move_processed_file (file_located_data):
    # Delete the original file, replace it with the processed file
    file_path = file_located_data['file_path']
    ffmepg_output_file_name = file_located_data['ffmepg_output_file_name']
    new_file_destination = os.path.join(os.path.dirname(file_path), os.path.basename(ffmepg_output_file_name))
    print('new_file_destination: ' + str(new_file_destination))
    try:
        print('removing: ' + str(file_path))
        os.remove(file_path)
        print('moving: ' + str(ffmepg_output_file_name) + ' to: ' + str(new_file_destination))
        shutil.move(ffmepg_output_file_name, new_file_destination) 
        return True
    except Exception as e:
        print(f"An error occurred: {e}")
        return False

def build_ffmpeg_command(file_path, ffmpeg_command, ffmepg_output_file_name):
    ffmpeg_settings = 'ffmpeg -hide_banner -loglevel 16 -stats -stats_period 10'
    ffmpeg_command = ffmpeg_settings + ' -y -i "' + file_path + '" ' + ffmpeg_command + ' "' + ffmepg_output_file_name + '"'
    print ('ffmpeg_command is: ' + ffmpeg_command)
    return ffmpeg_command
    

In [43]:
file_located_data= {
    "directory": "C:\\Users\\cwest\\Videos\\BoilestDev",
    "extension": ".mp4",
    "file": "test2.mp4",
    "file_path": "C:\\Users\\cwest\\Videos\\BoilestDev\\test2.mp4",
    "root": "C:\\Users\\cwest\\Videos\\BoilestDev",
    "ffmpeg_command": " -map 0:0 -c:v libsvtav1 -crf 25 -preset 4 -g 240 -pix_fmt yuv420p10le -svtav1-params filmgrain=20:film-grain-denoise=0:tune=0:enable-qm=1:qm-min=0:qm-max=15",
    "ffmepg_output_file_name": "C:\\Users\\cwest\\Videos\\BoilestDev\\boil_hold\\test2.mkv",
    "file_hash": 757
}

#ffmpeg_prelaunch_checks(file_located_data)
#ffmpeg_postlaunch_checks(file_located_data)
process_ffmpeg(file_located_data)
#process_file(file_located_data)
#validate_video(file_located_data['file_path'])
#validate_video(file_located_data['ffmepg_output_file_name'])
print('done done done')

C:\Users\cwest\Videos\BoilestDev\test2.mp4 : True
original_file_hash: 757
file size is: 757
current_file_hash: 757
Hashes match
file passed ffmpeg_prelaunch_checks
ffmpeg_command is: ffmpeg -hide_banner -loglevel 16 -stats -stats_period 10 -y -i "C:\Users\cwest\Videos\BoilestDev\test2.mp4"  -map 0:0 -c:v libsvtav1 -crf 25 -preset 4 -g 240 -pix_fmt yuv420p10le -svtav1-params filmgrain=20:film-grain-denoise=0:tune=0:enable-qm=1:qm-min=0:qm-max=15 "C:\Users\cwest\Videos\BoilestDev\boil_hold\test2.mkv"
running ffmpeg now
Svt[info]: -------------------------------------------

Svt[info]: SVT [version]:	SVT-AV1 Encoder Lib v2.1.0-14-gced0d040

Svt[info]: SVT [build]  :	GCC 14.1.0	 64 bit

Svt[info]: LIB Build date: Jun  9 2024 11:58:57

Svt[info]: -------------------------------------------

Svt[info]: Number of logical cores available: 20

Svt[info]: Number of PPCS 140

Svt[info]: [asm level on system : up to avx2]

Svt[info]: [asm level selected : up to avx2]

Svt[info]: ------------------

# Writing to DB

In [None]:
from datetime import datetime
import json, os, sqlite3, logging

def encoding_results_db(file_located_data):
    recorded_date = datetime.now()

    logging.debug ("File encoding recorded: " + str(recorded_date))
    unique_identifier = file_located_data["file"] + str(recorded_date.microsecond)
    logging.debug ('Primary key saved as: ' + unique_identifier)

    #database = r"/Boilest/DB/Boilest.db"
    database = r"/Boilest/DB/Boilest.db"
    try:
        conn = sqlite3.connect(database)
        c = conn.cursor()
        c.execute(
            "INSERT INTO ffencode_results"
            " VALUES (?,?,?,?,?,?,?,?,?,?)",
            (
                unique_identifier,
                recorded_date,
                ffresults_input["file"], 
                ffresults_input["file_path"], 
                ffresults_input["new_file_size"], 
                ffresults_input["new_file_size_difference"], 
                ffresults_input["old_file_size"],
                ffresults_input["ffmpeg_command"],
                ffresults_input["encode_outcome"],
                ffresults_input["original_string"]
            )
        )
        conn.commit()
        
        c.execute("SELECT ROUND(SUM(new_file_size_difference)) FROM ffencode_results")
        result = c.fetchone()[0]
        if result is not None:
            logging.info(f'Total space saved: {result} MB')
        else:
            logging.warning('No records found in ffencode_results table.')
            
    except sqlite3.Error as e:
        logging.error(f"Database error: {e}")
    except Exception as e:
        logging.error(f"Unexpected error: {e}")
    finally:
        conn.close()