In [62]:
from datetime import timedelta, datetime

import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from dateutil.relativedelta import relativedelta

CUD_COLORS = (
    "#e69f00",  # orange
    "#56b4e9",  # sky-blue
    "#009e73",  # bluish-green
    "#f0e442",  # yellow
    "#0072b2",  # blue
    "#d55e00",  # vermilion
    "#cc79a7",  # reddish-purple
)

pio.templates["cud"] = go.layout.Template(
    layout=go.Layout(
        colorway=CUD_COLORS,
    )
)
pio.templates.default = "plotly_white+cud"


def hex_to_rgb(hex: str, opacity: float = 1.0) -> tuple:
    return tuple(int(hex.lstrip("#")[i : i + 2], 16) for i in (0, 2, 4)) + tuple(
        [opacity]
    )


In [63]:
df_CPU = pd.DataFrame(
    {
        "step": ["megSAPma", "megSAPvc", "megSAPcn", "megSAPsv", "megSAPdb"],
        "allocated": [12, 12, 12, 12, 12],
        "used": [12 * 0.287, 12 * 0.531, 12 * 0.086, 12 * 0.157, 12 * 0.017],
    }
)

df_memory = pd.DataFrame(
    {
        "step": ["megSAPma", "megSAPvc", "megSAPcn", "megSAPsv", "megSAPdb"],
        "allocated": [50, 50, 50, 50, 50],
        "used": [50 * 0.02, 50 * 0.328, 50 * 0.819, 50 * 0.022, 50 * 0.007],
    }
)

df_memory = pd.DataFrame(
    {
        "step": ["megSAPma", "megSAPvc", "megSAPcn", "megSAPsv", "megSAPdb"],
        "allocated": [50, 50, 50, 50, 50],
        "used": [50 * 0.02, 50 * 0.328, 50 * 0.819, 50 * 0.022, 50 * 0.007],
    }
)

df_runtime = pd.DataFrame(
    {
        "step": ["megSAPma", "megSAPvc", "megSAPcn", "megSAPsv", "megSAPdb"],
        "time": [
            pd.Timedelta("2 hours 54 min"),
            pd.Timedelta("2 hours 40 min"),
            pd.Timedelta("6 hours 11 min"),
            pd.Timedelta("1 hours 3 min"),
            pd.Timedelta("3 min 51 s"),
        ],
    }
)


In [64]:
figure_CPU = go.Figure()

figure_CPU.add_trace(
    go.Bar(
        x=df_CPU["step"],
        y=df_CPU["allocated"],
        name="allocated",
        marker_color=CUD_COLORS[1],
    ),
)

figure_CPU.add_trace(
    go.Bar(
        x=df_CPU["step"],
        y=df_CPU["used"],
        name="used (average)",
        marker_color=CUD_COLORS[0],
    ),
)

figure_CPU.update_layout(
    dict(
        width=570,
        height=320,
        margin=dict(l=20, r=20, t=20, b=20),
        template="plotly_white",
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="center", x=0.5),
        xaxis_title="Process",
        yaxis_title="CPU cores",
        font=dict(family="Arial", color="#000000", size=10),
    )
)

figure_CPU.write_image("pipeline_benchmark_CPU_v03.pdf")

figure_CPU.update_layout(
    dict(
        width=1024,
        height=600,
        font=dict(family="Arial", color="#000000"),
    )
)
figure_CPU.show()


In [65]:
figure_memory = go.Figure()

figure_memory.add_trace(
    go.Bar(
        x=df_memory["step"],
        y=df_memory["allocated"],
        name="allocated",
        marker_color=CUD_COLORS[1],
    ),
)

figure_memory.add_trace(
    go.Bar(
        x=df_memory["step"],
        y=df_memory["used"],
        name="used (peak)",
        marker_color=CUD_COLORS[0],
    ),
)

figure_memory.update_layout(
    dict(
        width=570,
        height=320,
        margin=dict(l=20, r=20, t=20, b=20),
        template="plotly_white+cud",
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="center", x=0.5),
        xaxis_title="Process",
        yaxis_title="memory cores",
        font=dict(family="Arial", color="#000000", size=10),
    )
)

figure_memory.write_image("pipeline_benchmark_memory_v03.pdf")

figure_memory.update_layout(
    dict(
        width=1024,
        height=600,
        font=dict(family="Arial", color="#000000"),
    )
)
figure_memory.show()


In [127]:
figure_runtime = go.Figure()

figure_runtime.add_trace(
    go.Bar(
        y=["Median runtime<br />2022"],
        x=[pd.Timedelta("1 days 0 hours 31 min").total_seconds() / 60 / 60],
        name="Median runtime 2022",
        orientation="h",
        showlegend=False,
        marker_color=CUD_COLORS[0]
    ),
)

figure_runtime.add_trace(
    go.Bar(
        y=["Runtime of<br />initial Nextflow<br />workflow"],
        x=[pd.Timedelta("11.5 hours").total_seconds() / 60 / 60],
        name="Runtime of initial Nextflow workflow",
        orientation="h",
        showlegend=False,
        marker_color=CUD_COLORS[0]
    ),
)

for index, row in df_runtime.iterrows():
    figure_runtime.add_trace(
        go.Bar(
            y=["Runtime of<br />Nextflow workflow<br />with separated<br />steps"],
            x=[row["time"].total_seconds() / 60 / 60],
            name=row["step"],
            text=row["step"],
            textposition="auto",
            orientation="h",
            marker_color=CUD_COLORS[1+index]
        ),
    )

figure_runtime.update_layout(barmode="stack", yaxis=dict(autorange="reversed"))

figure_runtime.update_layout(
    dict(
        width=570,
        height=320,
        margin=dict(l=20, r=20, t=20, b=20),
        showlegend=False,
        legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="center", x=0.5),
        xaxis_title="Runtime in h",
        font=dict(family="Arial", color="#000000", size=10),
    )
)

figure_runtime.write_image("pipeline_benchmark_runtime_v03.pdf")

figure_runtime.update_layout(
    dict(
        width=1024,
        height=600,
        font=dict(family="Arial", color="#000000"),
    )
)
figure_runtime.show()


In [128]:
%%bash
./repair_pdf.sh