In [None]:
import pandas as pd
import plotly.express as px
import numpy as np

# Read parameters of GCall

In [None]:
params_df = pd.read_csv("../data/internal_datasets/GCall/params.csv", dtype={'seq_id': str})
params_df = params_df.set_index("seq_id")
params_df.index.name = None

params_df

# Plot efficiency distribution with threshold

In [None]:
fig = px.histogram(
    params_df, 
    x='eff', 
    histnorm="probability density",
    facet_col_spacing=0.05,
    color_discrete_sequence=['#888888'],
    range_x=[params_df['eff'].min(), 1.02], 
)
fig.update_traces(xbins=dict(start=0.5, end=1.5, size=0.002), selector=dict(type='histogram'))

def ecdf(a):
    x, counts = np.unique(a, return_counts=True)
    cusum = np.cumsum(counts)
    return x, cusum / cusum[-1]
x, y = ecdf(params_df['eff'])
fig.add_vline(
    x=x[np.argmax(y > 0.02)], 
    line_width=2,
    line_dash="dot",
    opacity=1,
    line_color="#000000",
    layer='below'
)

fig.update_xaxes(
    title_text='PCR efficiency', 
    dtick=0.05,
    minor_dtick=0.05
)
fig.update_yaxes(
    title_text=None, 
    dtick=500, 
    minor_dtick=250
)
fig.update_layout(
    template="simple_white",
    font_family="Inter",
    legend_font_size=20/3,
)
fig.update_yaxes(
    minor_ticks="outside", 
    title_font_family="Inter", 
    title_font_size=20/3, 
    tickfont_size=20/3, 
)
fig.update_xaxes(
    minor_ticks="outside", 
    title_font_family="Inter", 
    title_font_size=20/3, 
    tickfont_size=20/3, 
)
fig.for_each_annotation(lambda a: a.update(
    font_size=20/3,
    font_family="Inter",
))
fig.update_layout(
    height=90, 
    width=100, 
    margin=dict(l=0, r=5, t=10, b=30),
    showlegend=False,
)
fig.update_traces(marker=dict(line_width=0), selector=dict(type='histogram')) 

fig.write_image("./figure_3_performance/efficiency_dist.svg")
fig.show()