In [None]:
import pandas as pd
import plotly.express as px

import sys
sys.path.append('../')
import plotting

## Read in abundance data for verification pool

In [None]:
abundance_df = pd.read_csv("../data/internal_datasets/verification_parameter_estimates/abundance_by_experiment.csv", dtype={'seq_id': str}).set_index("seq_id")
abundance_df.index.name = None
abundance_df = abundance_df.div(abundance_df.mean(axis=0), axis=1)

abundance_df

## Read in annotation for selected sequences

In [None]:
annotation_df = pd.read_csv("./pool_composition.csv", dtype={'seq_id': str})
annotation_df['seq_id'] = annotation_df['GC'] + "_" + annotation_df['seq_id']
annotation_df.drop(columns=['GC'], inplace=True)
annotation_df.set_index("seq_id", inplace=True)
annotation_df.index.name = None

annotation_df

## Join annotation and abundance data

In [None]:
df = abundance_df.merge(annotation_df, how="inner", left_index=True, right_index=True)
df

# Combine group and type annotation

In [None]:
df['group_type'] = df['group'] + df['type']
df

## Plot distributions

In [None]:
plot_df = pd.melt(
    df.reset_index(), 
    id_vars=['index', "group", "type", "group_type"],
    value_vars=[f'PCR{str(i).zfill(2)}' for i in range(1, 10+1)],
    var_name='PCR',
    value_name='x',
)
plot_df['n_cycles'] = plot_df['PCR'].str.extract(r'(\d+)').astype(int)*15
plot_df.loc[plot_df['x'].isna(), 'x'] = 0

In [None]:
fig = px.histogram(
    plot_df, 
    x='x', 
    color="group_type", 
    barmode="stack",
    facet_col="n_cycles",
    facet_col_spacing=0.03,
    facet_col_wrap=5,
    facet_row_spacing=0.05,
    color_discrete_map={
        'Model1top': '#3182bd',  
        'Model1bottom': '#de2d26', 
        'Model2top': '#bdd7e7',  
        'Model2bottom': '#fcae91', 
        'normalnormal': '#969696', 
    },
    range_x=[0, 3], 
    range_y=[0, 250], 
)
fig.for_each_annotation(lambda a: a.update(text=""))
fig.update_traces(xbins=dict(start=0.0, end=3.0, size=0.05), selector=dict(type='histogram'))
fig.update_layout(
    height=400, 
    width=680, 
    margin=dict(l=0, r=10, t=5, b=0),
    showlegend=False,
)
fig.update_xaxes(
    title_text='', 
    dtick=1,
    minor_dtick=0.5
)
fig.update_yaxes(
    title_text='', 
    dtick=100,
    minor_dtick=25
)
fig.update_xaxes(
    title_text='Relative coverage',
    row=1,
)
fig.update_yaxes(
    title_text='Sequences',
    row=1,
    col=1,
)
fig.update_yaxes(
    title_text='Sequences',
    row=2,
    col=1,
)
fig.update_traces(marker=dict(line_width=0), selector=dict(type='histogram')) 

for i, n_cycles in enumerate(sorted(plot_df.n_cycles.unique())):
    idf = plot_df[plot_df['n_cycles'] == n_cycles]
    mean_top1 = idf.loc[idf['group_type'] == 'Model1top', 'x'].mean()
    mean_bottom1 = idf.loc[idf['group_type'] == 'Model1bottom', 'x'].mean()
    mean_top2 = idf.loc[idf['group_type'] == 'Model2top', 'x'].mean()
    mean_bottom2 = idf.loc[idf['group_type'] == 'Model2bottom', 'x'].mean()
    mean_other = idf.loc[idf['group_type'] == 'normalnormal', 'x'].mean()
    fig.add_annotation(
        x=2.0,
        y=160,
        text=f"<b>{n_cycles} cycles</b><br>x̄ = {mean_top1:0.2f}<br>x̄ = {mean_bottom1:0.2f}<br>x̄ = {mean_top2:0.2f}<br>x̄ = {mean_bottom2:0.2f}<br>x̄ = {mean_other:0.2f}",
        font_color="black",
        align='center',
        showarrow=False,
        col=(i%5)+1,
        row=2-(i//5),
    )

fig = plotting.standardize_plot(fig)
fig.show()
fig.write_image("./SI_figure_pool_results_allcycles/pool_evolution_full.svg")

# also export data
plot_df.to_csv("./SI_figure_pool_results_allcycles/pool_evolution_full.csv", index=False)