In [3]:
formats = {
    'DOCX': {
        'pdf': {},
        'odt': {},
        'html': {},
        'txt': {},
        'rtf': {}
    },
    'PDF': {
        'docx': {},
        'xlsx': {},
        'pptx': {},
        'odt': {},
        'ods': {},
        'html': {},
        'txt': {}
    },
    'PNG': {
        'jpg': {},
        'bmp': {},
        'tiff': {},
    },
    'JPG': {
        'png': {},
        'bmp': {},
        'tiff': {},
    }
}

In [5]:
# test module for converting files to some formats (tested and new)
# input path: media/{format}_input_{number}.{format}
# output path: media/output/{format}_output_{number}.{dest_format}
from io import BytesIO
import os
import time
from file_converter import define_class_for_format
from file_converter.exceptions import ErrorConvertFile
from file_converter.types.document import Document


dir_path = 'media'
listdir = os.listdir(dir_path)
listdir.sort()
for path in listdir:
    if not os.path.isfile(os.path.join(dir_path, path)):
        continue
    
    input_name, input_format = path.split('.')
    input_format = input_format.upper()
    if input_format not in formats:
        continue

    format_class = define_class_for_format(input_format)
    dest_formats = formats[input_format]
    input_obj = format_class(f'{dir_path}/{path}')

    success_formats = []
    for dest_format, data in dest_formats.items():
        if data == {}:
            data['test_time'] = [] 
            data['test_size'] = [] 
        try:
            start_time = time.time()
            output_obj = input_obj.convert_to(dest_format)
            end_time = time.time()
        except ErrorConvertFile as exc:
            # print(str(exc))
            ...
        else:
            with open(f'media/output/{input_format}_output_{input_name.split("_")[2]}.{dest_format}', 'wb') as outfile:
                outfile.write(output_obj.getbuffer())
                
            data['test_time'].append(end_time - start_time)
            if issubclass(format_class, Document):
                data['test_size'].append(input_obj.doc.getbuffer().nbytes / 1_000_000)
            else:
                output = BytesIO()
                input_obj.img.save(output, format=input_obj.format)
                data['test_size'].append(output.getbuffer().nbytes / 1_000_000)

            success_formats.append(dest_format)

    print(f"{path} -> {', '.join(success_formats)}")
    
print()
for from_format, to_formats in formats.items():
    for to_format, data in to_formats.items():
        times = data.get('test_time', [])
        sizes = data.get('test_size', [])
        if times == []:
            continue

        avg_time = sum(times) / len(times)
        min_time = min(times)
        max_time = max(times)
        size_min_time = sizes[times.index(min_time)]
        size_max_time = sizes[times.index(max_time)]
        print(f"{from_format} -> {to_format} - {avg_time:.3f}sec AVG  |  {min_time:.3f}sec MIN ({size_min_time:.2f}mb)  |  {max_time:.3f}sec MAX ({size_max_time:.2f}mb)")

KeyError: 'docx'

In [None]:
# sort and name test files
import os

def rename_files(directory):
    file_count = {}
    for filename in os.listdir(directory):
        if os.path.isfile(os.path.join(directory, filename)):
            file_extension = os.path.splitext(filename)[1][1:]
            if file_extension not in file_count:
                file_count[file_extension] = 1
            else:
                file_count[file_extension] += 1

            new_filename = f"{file_extension}_input_{file_count[file_extension]}.{file_extension}"
            os.rename(os.path.join(directory, filename), os.path.join(directory, new_filename))

directory_path = "media"

rename_files(directory_path)


In [1]:
import file_converter

format = file_converter.define_class_for_format('JPGs')
print(format)

<class 'file_converter.jpg.JPGs'>
