In [None]:
import pandas as pd
import plotly.express as px

import sys
sys.path.append('../')
import plotting

# Read parameter data from GCall and GCfix

In [None]:
params_df = pd.read_csv("../data/internal_datasets/GCall/params.csv", dtype={'seq_id': str})
params_df['seq_id'] = "GCall_" + params_df['seq_id']
params_df_fix = pd.read_csv("../data/internal_datasets/GCfix/params.csv", dtype={'seq_id': str})
params_df_fix['seq_id'] = "GCfix_" + params_df_fix['seq_id']
params_df = pd.concat([params_df, params_df_fix])

params_df

# Read sequences selected for the verification pool

In [None]:
annotation_df = pd.read_csv("./pool_composition.csv", dtype={'seq_id': str})
annotation_df['seq_id'] = annotation_df['GC'] + "_" + annotation_df['seq_id']
annotation_df.drop(columns=['GC'], inplace=True)
annotation_df['selected'] = True

annotation_df

# Merge parameters and selected sequences

In [None]:
data = pd.merge(params_df, annotation_df, left_on='seq_id', right_on="seq_id", how="outer")
data['selected'] = data['selected'].fillna(False)

data['group_type'] = data['group'] + data['type']
data['group_type'] = data['group_type'].fillna("not selected")
data['group_type'] = pd.Categorical(data['group_type'], ["normalnormal", "Model1top", "Model1bottom", "Model2top", "Model2bottom", "not selected"])
data.sort_values(by='group_type', inplace=True)

display(data)
data.group_type.value_counts()

In [None]:
fig = px.histogram(
    data,
    x="eff",
    color="group_type",
    color_discrete_map={
        'Model1top': '#3182bd',  
        'Model1bottom': '#de2d26', 
        'Model2top': '#bdd7e7',  
        'Model2bottom': '#fcae91', 
        'normalnormal': '#969696', 
        'not selected': '#dddddd',
    },
)
fig.update_traces(marker_line_width=0, marker_line_color="white", xbins=dict(size=0.0015))
fig.update_yaxes(range=[0, 100])
fig.update_xaxes(range=[0.775, 1.025])
fig.update_layout(
    xaxis_title="Amplification efficiency estimated by the model",
    yaxis_title="Number of sequences",
    legend_title="Category",
    showlegend=True,
    width=680,
    height=200,
    margin=dict(l=0, r=150, t=10, b=0),
)

fig = plotting.standardize_plot(fig)
fig.show()
fig.write_image("./SI_figure_pool_results_allcycles/efficiency_histogram.svg")