# benchmarks of different bam to fastq tools


## Imports / Constants


In [4]:
from math import ceil, floor

import pandas as pd
import plotly.graph_objects as go
import pyarrow.feather as feather


CUD_COLORS = (
    "#e69f00",  # orange
    "#56b4e9",  # sky-blue
    "#009e73",  # bluish-green
    "#f0e442",  # yellow
    "#0072b2",  # blue
    "#d55e00",  # vermilion
    "#cc79a7",  # reddish-purple
)


## Initial data parsing


In [5]:
df = feather.read_feather("bam2fastqbenchmarks/results.df")
df_grouped = df.groupby([df["run"], df["type"]])["start"].min().reset_index()
df_grouped = df_grouped.merge(
    df.groupby(["run", "type"])["complete"].max().reset_index()
)
df_grouped["duration"] = (df_grouped["complete"] - df_grouped["start"]) / 60 / 1000


## Plot


In [10]:
figure = go.Figure()

color_counter = 0
for run_type in df_grouped.sort_values("duration", ascending=False)["type"].unique():
    figure.add_trace(
        go.Box(
            name=run_type,
            x=df_grouped[df_grouped["type"] == run_type]["duration"],
            boxpoints=False,
            marker_color=CUD_COLORS[color_counter],
            line_width=1,
        )
    )
    color_counter += 1

figure.update_layout(
    dict(
        width=570,
        height=430,
        margin=dict(l=20, r=20, t=20, b=20),
        template="plotly_white",
        font=dict(family="Arial", color="#000000", size=12),
        showlegend=False,
        xaxis_title="Runtime in minutes",
    )
)
figure.update_xaxes(
    zeroline=True,
    range=[
        floor(df_grouped["duration"].min()) - 0.1,
        ceil(df_grouped["duration"].max()) + 0.1,
    ],
    dtick=1,
    title_font=dict(family="Arial", color="#000000", size=12),
)
figure.update_yaxes(tickangle=270)
figure.write_image("bam2fastqbenchmark.pdf")
figure.show()
