In [22]:
from datetime import timedelta, datetime

import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from dateutil.relativedelta import relativedelta

CUD_COLORS = (
    "#e69f00",  # orange
    "#56b4e9",  # sky-blue
    "#009e73",  # bluish-green
    "#f0e442",  # yellow
    "#0072b2",  # blue
    "#d55e00",  # vermilion
    "#cc79a7",  # reddish-purple
)


def hex_to_rgb(hex: str, opacity: float = 1.0) -> tuple:
    return tuple(int(hex.lstrip("#")[i : i + 2], 16) for i in (0, 2, 4)) + tuple(
        [opacity]
    )


In [23]:
df = pd.read_csv("sequenced_samples_and_bases.csv", parse_dates=["date"])
df["bases sequenced"] = df["bases sequenced (MB)"] * 1000 * 1000
df = df[df["date"] > "2017/01/01"]
df = df[df["date"] < "2022/07/01"]
df.sort_values(by=["date"], inplace=True)
df = df.groupby(df["date"]).sum()
df = df.resample("M").sum()
df_quarter = df.resample("Q").sum()
df.reset_index(inplace=True)
df_quarter.reset_index(inplace=True)


In [24]:
figure = make_subplots(specs=[[{"secondary_y": True}]])

figure.add_trace(
    go.Scatter(
        x=df_quarter["date"],
        y=df_quarter["bases sequenced"],
        mode="lines",
        name="bases",
        opacity=1,
        line=dict(color=CUD_COLORS[1], width=2),
    ),
    secondary_y=True,
)

figure.add_trace(
    go.Scatter(
        x=df_quarter["date"],
        y=df_quarter["samples"],
        name="samples",
        line=dict(color=CUD_COLORS[0], width=2),
    ),
    secondary_y=False,
)

figure.update_layout(
    dict(
        width=570,
        height=320,
        margin=dict(l=20, r=20, t=20, b=20),
        template="plotly_white",
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="center", x=0.5),
        xaxis_title="Quarter",
        yaxis_title="samples per quarter",
        yaxis2_title="bases sequenced per quarter",
        font=dict(family="Arial", color="#000000", size=10),
    )
)

figure.add_vline(
    x=datetime(2017, 9, 1, 0, 0).timestamp() * 1000,
    annotation_text="start whole-exome sequencing",
    annotation_position="top right",
    annotation_font_size=10,
    line_width=2,
    line_dash="dash",
    line_color=CUD_COLORS[2],
)

figure.add_vline(
    x=datetime(2021, 3, 15, 0, 0).timestamp() * 1000,
    annotation_text="start whole-genome sequencing",
    annotation_position="top right",
    annotation_font_size=10,
    line_width=2,
    line_dash="dash",
    line_color=CUD_COLORS[2],
)

figure.update_xaxes(
    ticklabelstep=1,
    tick0=df_quarter["date"].min(),
    range=[
        df_quarter["date"].min() + relativedelta(months=-1),
        df_quarter["date"].max() + relativedelta(months=+1),
    ],
    dtick="M3",
    tickformat="Q%q\n%Y",
    tickfont=dict(family="Arial", color="#000000", size=8),
    title_font=dict(family="Arial", color="#000000", size=12),
    showline=True,
    linewidth=1,
    linecolor="darkgrey",
)

figure.update_yaxes(
    title_font=dict(family="Arial", color="#000000", size=12),
    showline=False,
    tickcolor="darkgrey",
    zeroline=True,
    gridcolor="lightgrey",
    rangemode="tozero",
    title_standoff = 5
)

figure.write_image("sequenced_samples_and_bases.pdf")

figure.update_layout(
    dict(
        width=1024,
        height=600,
        font=dict(family="Arial", color="#000000"),
    )
)
figure.show()


In [25]:
%%bash
./repair_pdf.sh