In [6]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

CUD_COLORS = (
    "#e69f00",  # orange
    "#56b4e9",  # sky-blue
    "#009e73",  # bluish-green
    "#f0e442",  # yellow
    "#0072b2",  # blue
    "#d55e00",  # vermilion
    "#cc79a7",  # reddish-purple
)


In [7]:
df_sequencing_cost = pd.read_excel(
    "https://www.genome.gov/sites/default/files/media/files/2021-11/Sequencing_Cost_Data_Table_Aug2021.xls"
)
df_runs_dohg = pd.read_csv("sequenced_samples_and_bases.csv", parse_dates=["date"])


In [8]:
df_sequencing_cost.drop(columns=["Cost per Mb"], inplace=True)
df_runs_dohg.drop(columns=["bases sequenced (MB)"], inplace=True)
df_runs_dohg = df_runs_dohg[df_runs_dohg["date"] < "2022/01/01"]
df_runs_dohg = df_runs_dohg.groupby(df_runs_dohg["date"]).sum()
df_runs_dohg = df_runs_dohg.resample("Y").sum()
df_runs_dohg.reset_index(inplace=True)

In [9]:
figure = make_subplots(specs=[[{"secondary_y": True}]])
figure.add_trace(
    go.Scatter(
        x=df_sequencing_cost["Date"],
        y=df_sequencing_cost["Cost per Genome"],
        mode="lines",
        name="Cost",
        opacity=1,
        line=dict(color=CUD_COLORS[0], width=2),
    ),
    secondary_y=False,
)
figure.update_yaxes(type="log", secondary_y=False)

figure.add_trace(
    go.Scatter(
        x=df_runs_dohg["date"],
        y=df_runs_dohg["samples"],
        mode="lines",
        name="Sum of processed samples",
        opacity=1,
        line=dict(color=CUD_COLORS[1], width=2),
    ),
    secondary_y=True,
)

figure.update_layout(
    dict(
        width=570,
        height=320,
        margin=dict(l=20, r=20, t=20, b=20),
        template="plotly_white",
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="center", x=0.5),
        xaxis_title="Year",
        yaxis_title="Cost per genome sequencing in $ (log)",
        yaxis2_title="Sum of processed samples<br>at the Department of Human Genetics<br>at Hanover Medical School (MHH)",
        font=dict(family="Arial", color="#000000", size=10),
    )
)

figure.update_xaxes(
    title_font=dict(family="Arial", color="#000000", size=12),
    gridcolor="darkgrey",
    showline=True,
    linewidth=1,
    linecolor="darkgrey",
    dtick="M36"
)

figure.update_yaxes(
    title_font=dict(family="Arial", color="#000000", size=12),
    showline=False,
    tickcolor="darkgrey",
    gridcolor="lightgrey",
    rangemode="tozero",
)

figure.write_image("sequenced_samples_and_cost.pdf")
figure.show()


In [10]:
%%bash
./repair_pdf.sh