In [None]:
import os
import subprocess
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
IMAGES_DIR = './images/input'
RESULTS_DIR = './results'
NUMBER_OF_ITERATIONS = 1
IMAGE_DPI = 300

number_of_images = len([name for name in os.listdir(IMAGES_DIR) if os.path.isfile(os.path.join(IMAGES_DIR, name))])

In [None]:
for file in os.listdir('results'):
    if file.endswith('.txt'):
        os.remove(f'results/{file}')

In [None]:
scripts = ['./ffmpeg.sh', './graphicsmagick.sh', './imagemagick.sh', './libvips.sh', './pillow.py', './opencv.py']

for script in scripts:
    for _ in range(NUMBER_OF_ITERATIONS):
        # get the file extension
        _, ext = os.path.splitext(script)
        # run python files with python3
        if ext == '.py':
            subprocess.run(['python3', script])
        # run shell scripts directly
        elif ext == '.sh':
            subprocess.run(['bash', '-c', script])

In [None]:
tool_scores = {}
tool_durations = {}

for file in os.listdir('results'):
    if file.endswith('.txt'):
        df = pd.read_csv(f'results/{file}', header=None, names=['operation', 'time', 'file_size', 'original_file_size'])
        tool_name = file.replace('-results.txt', '')

        tool_scores[tool_name] = {}
        tool_durations[tool_name] = {}

        for operation in df['operation'].unique():
            selected_records = df[df['operation'] == operation]
            # Calculate the score using the adjusted file size, the original file size, and the duration.
            tool_scores[tool_name][operation] = (100 * selected_records['file_size'] / selected_records['original_file_size'] * selected_records['time']).values
            tool_durations[tool_name][operation] = selected_records['time']

In [None]:
sns.set_theme()
colors = sns.color_palette('mako', len(tool_scores))

operations = ['crop', 'compress', 'resize', 'convert']
for operation in operations:
    fig, ax = plt.subplots(dpi=IMAGE_DPI)

    tool_names = tool_scores.keys()
    data = [tool_scores[tool_name][operation] for tool_name in tool_names]

    ax.boxplot(data, labels=tool_names, patch_artist=True, boxprops=dict(facecolor='lightgray'), vert=False)
    ax.set_xlabel('Cost (lower is better)')
    ax.set_title(f'{operation.capitalize()} operation - Processing cost')

plt.show()

In [None]:
sns.set_theme()
colors = sns.color_palette('mako', len(tool_scores))

operations = ['crop', 'compress', 'resize', 'convert']
for operation in operations:
    fig, ax = plt.subplots(dpi=IMAGE_DPI)
    bar_width = 1
    tool_names = tool_scores.keys()

    for i, (tool_name, results) in enumerate(tool_scores.items()):
        number_of_images = len(tool_scores[tool_name])
        average_duration_in_ms = sum(results[operation]) / number_of_images
        ax.barh(i, average_duration_in_ms, bar_width, label=tool_name, color=colors[i])
        
    ax.set_xlabel('Average cost (lower is better)')
    ax.set_title(f'{operation.capitalize()} operation - Average processing cost')
    index = range(len(tool_scores))
    ax.set_yticks(index)
    yticklabels = ax.set_yticklabels(tool_scores.keys())

    average_scores = [(tool, 100 * sum(results[operation]) / number_of_images) for tool, results in tool_scores.items()]
    min_time_tool, min_time = min(average_scores, key=lambda item: item[1])

    for label in yticklabels:
        if label.get_text() == min_time_tool:
            label.set_color('green')
    
    plt.show()

In [None]:
sns.set_theme()
colors = sns.color_palette('mako', len(tool_durations))

operations = ['crop', 'compress', 'resize', 'convert']
for operation in operations:
    fig, ax = plt.subplots(dpi=IMAGE_DPI)
    fig.set_size_inches(8, 6)

    tool_names = tool_durations.keys()
    data = [tool_durations[tool_name][operation] for tool_name in tool_names]

    ax.boxplot(data, labels=tool_names, patch_artist=True, boxprops=dict(facecolor='lightgray'), vert=False)
    ax.set_xlabel('Score (lower is better)')
    ax.set_title(f'{operation.capitalize()} operation - Processing durations')

    plt.show()

In [None]:
sns.set_theme()
colors = sns.color_palette('mako', len(tool_durations))

operations = ['crop', 'compress', 'resize', 'convert']
for operation in operations:
    fig, ax = plt.subplots(dpi=IMAGE_DPI)
    bar_width = 1
    tool_names = tool_durations.keys()

    for i, (tool_name, results) in enumerate(tool_durations.items()):
        number_of_images = len(tool_durations[tool_name])
        average_duration_in_ms = 1000 * sum(results[operation]) / number_of_images
        ax.barh(i, average_duration_in_ms, bar_width, label=tool_name, color=colors[i])
        
    ax.set_xlabel('ms (lower is better)')
    ax.set_title(f'Average duration for {operation.capitalize()} operation')
    index = range(len(tool_durations))
    ax.set_yticks(index)
    yticklabels = ax.set_yticklabels(tool_durations.keys())

    average_durations = [(tool, sum(results[operation]) / number_of_images) for tool, results in tool_durations.items()]
    min_time_tool, min_time = min(average_durations, key=lambda item: item[1])

    for label in yticklabels:
        if label.get_text() == min_time_tool:
            label.set_color('green')
    
    plt.show()