# Import libraries

In [None]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Clear results

In [None]:
if os.path.exists('results/results.csv'):
    os.remove('results/results.csv')

# Clear generated images

In [None]:
for file in os.listdir('images/output'):
    if file.endswith(('.png', '.jpg', '.jpeg', '.webp', '.avif')):
        os.remove(f'images/output/{file}')

In [None]:
!./simple.sh

# Visualizations

In [None]:
df = pd.read_csv(
    f'results/results.csv',
    header=None,
    names=[
        'tool',
        'file_name',
        'original_file_format',
        'original_file_size',
        'operation',
        'new_file_format',
        'duration_in_seconds',
        'new_file_size'
    ]
)

In [None]:
df['relative_file_size'] = df['new_file_size'] / df['original_file_size']
df['cost'] = 100 * df['relative_file_size'] * df['duration_in_seconds']
df['duration_in_ms'] = df['duration_in_seconds'] * 1000

In [None]:
IMAGE_DPI = 300

sns.set_theme()
tools = df['tool'].unique()
colors = sns.color_palette('mako', len(tools))
operations = df['operation'].unique()

## Cost findings

In [None]:
for operation in operations:
    fig, ax = plt.subplots(dpi=IMAGE_DPI)

    # Group the data by tool and calculate the median duration for each tool.
    grouped_data = df[df['operation'] == operation].groupby('tool')['cost']
    medians = grouped_data.median().sort_values(ascending=False)

    # Sort the data by the median duration.
    data = [grouped_data.get_group(tool).values for tool in medians.index]

    ax.boxplot(data, labels=medians.index, patch_artist=True, boxprops=dict(facecolor='#3e3e3e'), vert=False)
    ax.set_xlabel('Cost (lower is better)')
    ax.set_title(f'{operation.capitalize()} operation - Cost')

plt.show()

In [None]:
for operation in operations:
    fig = plt.figure(dpi=IMAGE_DPI)
    average_costs = df[df['operation'] == operation].groupby('tool')['cost'].mean().sort_values(ascending=False)
    new_colors = sns.color_palette('mako', len(average_costs))
    plt.barh(average_costs.index, average_costs, color=new_colors)
    plt.title(f'{operation.capitalize()} operation - Average Cost')
    plt.xlabel('Average cost')
    plt.show()

## Duration findings

In [None]:
for operation in operations:
    fig, ax = plt.subplots(dpi=IMAGE_DPI)

    grouped_data = df[df['operation'] == operation].groupby('tool')['duration_in_ms']
    medians = grouped_data.median().sort_values(ascending=False)

    data = [grouped_data.get_group(tool).values for tool in medians.index]

    ax.boxplot(data, labels=medians.index, patch_artist=True, boxprops=dict(facecolor='#3e3e3e'), vert=False)
    ax.set_xlabel('ms (lower is better)')
    ax.set_title(f'{operation.capitalize()} operation - Duration')

plt.show()

## File size findings

In [None]:
for operation in operations:
    fig, ax = plt.subplots(dpi=IMAGE_DPI)

    grouped_data = df[df['operation'] == operation].groupby('tool')['relative_file_size']
    medians = grouped_data.median().sort_values(ascending=False)

    data = [grouped_data.get_group(tool).values for tool in medians.index]

    ax.boxplot(data, labels=medians.index, patch_artist=True, boxprops=dict(facecolor='#3e3e3e'), vert=False)
    ax.set_xlabel('Relative file size (lower is better)')
    ax.set_title(f'{operation.capitalize()} operation - Relative file size')

plt.show()

# File format findings

Operation costs

In [None]:
fig, ax = plt.subplots(dpi=IMAGE_DPI)
sns.barplot(data=df, x='tool', y='cost', hue='new_file_format', palette='mako', errorbar=None)
ax.set_title('Average cost per tool and file format')
ax.set_ylabel('Cost')
ax.set_xlabel('')
plt.legend(title='New file format')
plt.show()

Operation durations

In [None]:
fig, ax = plt.subplots(dpi=IMAGE_DPI)
sns.barplot(data=df, x='tool', y='duration_in_ms', hue='new_file_format', palette='mako', errorbar=None)
ax.set_title('Average duration per tool and file format')
ax.set_ylabel('Duration (ms)')
ax.set_xlabel('')
plt.legend(title='New file format')
plt.show()

# Image artifacts

In [None]:
image_paths = sorted(
    [f'images/input/{file}' for file in os.listdir('images/input') if file.endswith(('.png', '.jpg', '.jpeg', '.webp', '.avif'))]
)

image_index = 23
image = plt.imread(image_paths[image_index])
file_name = os.path.basename(image_paths[image_index])
file_name_without_extension = os.path.splitext(file_name)[0]

fig, axs = plt.subplots(1, len(tools) + 1, dpi=IMAGE_DPI, figsize=(20, 5))

axs[0].imshow(image)
axs[0].set_title('Original image')
axs[0].grid(False)

for i, tool in enumerate(tools, start=1):
    image_tool = plt.imread(f'images/output/{file_name_without_extension}-compress-{tool}.jpg')
    axs[i].imshow(image_tool)
    axs[i].set_title(f'{tool} compressed image')
    axs[i].grid(False)

plt.show()

In [None]:
image_index = 25
image = plt.imread(image_paths[image_index])
file_name = os.path.basename(image_paths[image_index])
file_name_without_extension = os.path.splitext(file_name)[0]

fig, axs = plt.subplots(1, len(tools) + 1, dpi=IMAGE_DPI * 2, figsize=(20, 5))

axs[0].imshow(image)
axs[0].set_title('Original image')
axs[0].grid(False)

for i, tool in enumerate(tools, start=1):
    image_tool = plt.imread(f'images/output/{file_name_without_extension}-crop-{tool}.webp')
    axs[i].imshow(image_tool)
    axs[i].set_title(f'{tool} cropped image')
    axs[i].grid(False)

plt.show()