In [None]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

Clear results directory

In [None]:
# remove results/results.csv if it exists
if os.path.exists('results/results.csv'):
    os.remove('results/results.csv')

Clear output images

In [None]:
for file in os.listdir('images/output'):
    if file.endswith(('.png', '.jpg', '.jpeg', '.webp')):
        os.remove(f'images/output/{file}')

In [None]:
!./simple.sh

# Visualizations

In [None]:
df = pd.read_csv(
    f'results/results.csv',
    header=None,
    names=[
        'tool',
        'file_name',
        'original_file_format',
        'original_file_size',
        'operation',
        'new_file_format',
        'duration_in_seconds',
        'new_file_size'
    ]
)

In [None]:
df['relative_file_size'] = df['new_file_size'] / df['original_file_size']
df['cost'] = 100 * df['relative_file_size'] * df['duration_in_seconds']
df['duration_in_ms'] = df['duration_in_seconds'] * 1000

In [None]:
IMAGE_DPI = 200

sns.set_theme()
tools = df['tool'].unique()
colors = sns.color_palette('mako', len(tools))
operations = df['operation'].unique()

## Cost findings

In [None]:
for operation in operations:
    fig = plt.figure(dpi=IMAGE_DPI)
    average_costs = df[df['operation'] == operation].groupby('tool')['cost'].mean().sort_values(ascending=False)
    new_colors = sns.color_palette('mako', len(average_costs))
    plt.barh(average_costs.index, average_costs, color=new_colors)
    plt.title(f'{operation.capitalize()} operation - Cost')
    plt.xlabel('Cost')
    plt.show()

## Duration findings

In [None]:
for operation in operations:
    fig, ax = plt.subplots(dpi=IMAGE_DPI)

    # Group the data by tool and calculate the median duration for each tool.
    grouped_data = df[df['operation'] == operation].groupby('tool')['duration_in_ms']
    medians = grouped_data.median().sort_values(ascending=False)

    # Sort the data by the median duration.
    data = [grouped_data.get_group(tool).values for tool in medians.index]

    ax.boxplot(data, labels=medians.index, patch_artist=True, boxprops=dict(facecolor='#3e3e3e'), vert=False)
    ax.set_xlabel('ms (lower is better)')
    ax.set_title(f'{operation.capitalize()} operation - Duration')

plt.show()

## File size findings

In [None]:
for operation in operations:
    fig, ax = plt.subplots(dpi=IMAGE_DPI)

    # Group the data by tool and calculate the median relative file size for each tool.
    grouped_data = df[df['operation'] == operation].groupby('tool')['relative_file_size']
    medians = grouped_data.median().sort_values(ascending=False)

    # Sort the data by the median relative file size.
    data = [grouped_data.get_group(tool).values for tool in medians.index]

    ax.boxplot(data, labels=medians.index, patch_artist=True, boxprops=dict(facecolor='#3e3e3e'), vert=False)
    ax.set_xlabel('Relative file size (lower is better)')
    ax.set_title(f'{operation.capitalize()} operation - Relative file size')

plt.show()

In [None]:
for operation in operations:
    fig, ax = plt.subplots(dpi=IMAGE_DPI)

    # Group the data by tool and calculate the median relative file size for each tool.
    grouped_data = df[df['operation'] == operation].groupby('tool')['relative_file_size']
    medians = grouped_data.median().sort_values(ascending=False)

    # Sort the data by the median relative file size.
    data = [grouped_data.get_group(tool).values for tool in medians.index]

    ax.boxplot(data, labels=medians.index, patch_artist=True, boxprops=dict(facecolor='#3e3e3e'), vert=False)
    ax.set_xlabel('Relative file size (lower is better)')
    ax.set_title(f'{operation.capitalize()} operation - Relative file size')

plt.show()