In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

import sys
sys.path.append('..')
import plotting

colormap = {
    "GCall_000835": "#de2d26",
    "GCall_000006": "#636363",
    "GCall_001634": "#31a354",
}

## Read in population distributions for GCall and GCfix

In [None]:
pop_df = pd.read_csv("../data/internal_datasets/GCall/abundance_by_experiment.csv", dtype={'seq_id': str})
pop_df.seq_id = "GCall_" + pop_df.seq_id
pop_df = pop_df.set_index('seq_id')
pop_df.index.name = None
pop_df = pop_df.div(pop_df.mean(axis=0), axis=1)
pop_df

## Read in parameter estimates for GCall and GCfix

In [None]:
params_df = pd.read_csv("../data/internal_datasets/GCall/params.csv", dtype={'seq_id': str})
params_df['GC'] = "GCall"
params_df['seq_id'] = "GCall_" + params_df['seq_id']

params_df = params_df.set_index("seq_id")
params_df.index.name = None
params_df

## Join coverage data and parameter estimates

In [None]:
df = pop_df.merge(params_df, how="inner", left_index=True, right_index=True)
df

## Select example sequences

In [None]:
params = df.loc[list(colormap.keys())].copy()
params

## Plot coverage distributions

In [None]:
plot_df = pd.melt(
    df.reset_index(), 
    id_vars=['index', "GC", "eff", "x0"],
    value_vars=[f'PCR{str(i).zfill(1)}' for i in range(1, 6+1)],
    var_name='PCR',
    value_name='x',
)
plot_df['n_cycles'] = plot_df['PCR'].str.extract(r'(\d+)').astype(int)*15
plot_df.loc[plot_df['x'].isna(), 'x'] = 0

plot_df

### A single panel for the main figure

In [None]:
iplot_df = plot_df.loc[plot_df.PCR == "PCR4"].copy()
icolormap = {
    "GCall_000835": "#de2d26",
    "GCall_000006": "#444444",
    "GCall_001634": "#3182bd",
}

fig = px.histogram(
    iplot_df, 
    x='x', 
    facet_col_spacing=0.02,
    color_discrete_sequence=['#cccccc'], 
    range_x=[0, 2.5], 
    # range_y=[0, 2000], 
    histnorm='probability',
)
fig.update_traces(xbins=dict(start=0.0, end=2.5, size=0.1), selector=dict(type='histogram'))
fig.update_layout(
    height=160, 
    width=680, 
    margin=dict(l=0, r=10, t=20, b=0),
    showlegend=False,
)
fig.for_each_annotation(lambda a: a.update(text=f"{a.text.split("=")[-1]} cycles"))
fig.update_xaxes(
    title_text='Relative coverage', 
    dtick=1,
    minor_dtick=0.5
)
fig.update_yaxes(
    title_text='', 
    dtick=500, 
    minor_dtick=250
)
fig.update_yaxes(title_text='Number of sequences', row=1, col=1)
fig.update_traces(marker=dict(line_width=0), selector=dict(type='histogram')) 


for seq_id in params.index:
    color = icolormap[seq_id]
    fig.add_vline(
        x=params.loc[seq_id, f'PCR4'], 
        line_width=1.5, 
        line_color=color, 
        row=1,
        col=1,
        opacity=1.0,
    )


fig.update_layout(
    xaxis_title="Relative coverage",
    yaxis_title="# Sequences",
    margin=dict(l=0, r=5, t=10, b=20),
    height=75,
    width=110,
    showlegend=False,
)

fig.update_layout(
    template="simple_white",
    font_family="Inter",
    legend_font_size=20/3,
)
fig.update_yaxes(
    minor_ticks="outside", 
    title_font_family="Inter", 
    title_font_size=20/3, 
    tickfont_size=20/3, 
)
fig.update_xaxes(
    minor_ticks="outside", 
    title_font_family="Inter", 
    title_font_size=20/3, 
    tickfont_size=20/3, 
)
fig.show()
fig.write_image("./figure_1/cov_dist.svg")

### All panels for the SI figure

In [None]:
fig = px.histogram(
    plot_df, 
    x='x', 
    facet_col="n_cycles",
    color="n_cycles",
    facet_col_spacing=0.02,
    color_discrete_map={
        15: '#9ecae1',  
        30: '#6baed6', 
        45: '#4292c6', 
        60: '#2171b5', 
        75: '#08519c', 
        90: '#08306b', 
    },
    range_x=[0, 2.5], 
    range_y=[0, 2000], 
)
fig.update_traces(xbins=dict(start=0.0, end=2.5, size=0.1), selector=dict(type='histogram'))
fig.update_layout(
    height=160, 
    width=680, 
    margin=dict(l=0, r=10, t=20, b=0),
    showlegend=False,
)
fig.for_each_annotation(lambda a: a.update(text=f"{a.text.split("=")[-1]} cycles"))
fig.update_xaxes(
    title_text='Relative coverage', 
    dtick=1,
    minor_dtick=0.25
)
fig.update_yaxes(
    title_text='', 
    dtick=500, 
    minor_dtick=250
)
fig.update_yaxes(title_text='Number of sequences', row=1, col=1)
fig.update_traces(marker=dict(line_width=0), selector=dict(type='histogram')) 


for seq_id in params.index:
    for i in range(1, 6+1):
        color = colormap[seq_id]
        fig.add_vline(
            x=params.loc[seq_id, f'PCR{i}'], 
            line_width=1.5, 
            line_color=color, 
            row=1,
            col=i,
            opacity=1.0,
        )




fig = plotting.standardize_plot(fig)
fig.show()
fig.write_image("./SI_figure_pcr_model_explanation/cov_dist.svg")

# export data as well
plot_df.to_csv("./SI_figure_pcr_model_explanation/cov_dist.csv", index=False)
params.to_csv("./SI_figure_pcr_model_explanation/params.csv", index=True)

## Plot efficiency distributions

In [None]:
plot_df = df.copy()

plot_df

In [None]:
fig = px.histogram(
    plot_df, 
    x='eff',
    color_discrete_sequence=["#fd8d3c"],
    facet_col_spacing=0.05,
    range_x=[0.94, 1.02], 
    range_y=[0, 1250], 
)
fig.update_traces(xbins=dict(start=0.70, end=1.1, size=0.001), selector=dict(type='histogram'))
fig.update_layout(
    height=175, 
    width=175, 
    margin=dict(l=0, r=10, t=5, b=0),
    showlegend=False,
)
fig.update_xaxes(
    title_text='Relative PCR efficiency', 
    dtick=0.02,
    minor_dtick=0.01
)
fig.update_yaxes(
    title_text='', 
    dtick=500, 
    minor_dtick=250
)
fig.update_yaxes(
    title_text='Number of sequences',
    row=1,
    col=1,
)
fig.update_traces(marker=dict(line_width=0), selector=dict(type='histogram')) 


for seq_id in params.index:
    color = colormap[seq_id]
    fig.add_vline(
        x=params.loc[seq_id, f'eff'], 
        line_width=1.5, 
        line_color=color, 
        row=1,
        col=1,
        opacity=1.0,
    )


fig = plotting.standardize_plot(fig)
fig.show()
fig.write_image("./SI_figure_pcr_model_explanation/eff_dist.svg")

## Plot initial abundance distributions

In [None]:
plot_df = df.copy()

plot_df

In [None]:
fig = px.histogram(
    plot_df, 
    x='x0',
    color_discrete_sequence=["#fd8d3c"],
    facet_col_spacing=0.05,
    range_x=[0, 2.5], 
    range_y=[0, 1250], 
)
fig.update_traces(xbins=dict(start=0, end=5, size=0.07), selector=dict(type='histogram'))
fig.update_layout(
    height=175, 
    width=175, 
    margin=dict(l=0, r=10, t=5, b=0),
    showlegend=False,
)
fig.update_xaxes(
    title_text='Relative initial abundance', 
    dtick=1,
    minor_dtick=0.25
)
fig.update_yaxes(
    title_text='', 
    dtick=500, 
    minor_dtick=250
)
fig.update_yaxes(title_text='Number of sequences')
fig.update_traces(marker=dict(line_width=0), selector=dict(type='histogram')) 


for seq_id in params.index:
    color = colormap[seq_id]
    fig.add_vline(
        x=params.loc[seq_id, f'x0'], 
        line_width=1.5, 
        line_color=color, 
        row=1,
        col=1,
        opacity=1.0,
    )


fig = plotting.standardize_plot(fig)
fig.show()
fig.write_image("./SI_figure_pcr_model_explanation/x0_dist.svg")