In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import statsmodels.formula.api as smf
import statsmodels.stats.descriptivestats as smd
import statsmodels.api as sm
import scipy.stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import sys
sys.path.append('../')
import plotting

colormap = {
    "#11493": "#de2d26",
    "#00006": "#969696",
    "#09807": "#de2d26",
    "#01634": "#3182bd",
    "GCall": "#636363",
}

# Load qPCR data

In [None]:
dfs = []
for exp in ("Exp1", "Exp2", "Exp3"):
    df = pd.read_csv(f"../data/qpcr/{exp}.csv")
    df['exp'] = exp
    dfs.append(df)

df = pd.concat(dfs)
df

# Remove NTCs and insert seq_id

In [None]:
df = df[df['sample'] != "ntc"].copy()

sample_to_seqid = {
    "1": "#11493",
    "2": "#00006",
    "3": "#09807",
    "4": "#01634",
    "all": "GCall",
}
df['seq_id'] = df['sample'].map(sample_to_seqid)

df

# Plot calibration curves

In [None]:
fig = px.scatter(
    df, 
    x="dilution", 
    y="Ct", 
    color="seq_id", 
    facet_col="exp",
    trendline="ols",
    color_discrete_map={
        "#11493": "#fb6a4a",
        "#00006": "#969696",
        "#09807": "#a50f15",
        "#01634": "#74c476",
        "GCall": "#636363",
    },
)
fig.update_layout(
    yaxis_title="Cycle threshold",
    margin=dict(l=0, r=0, t=12, b=0),
    height=300,
    width=680,
)

fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_xaxes(
    title="log10(dilution)",
)

fig = plotting.standardize_plot(fig)
fig.write_image("./SI_figure_qpcr_calibration/qpcr_ct.svg")
fig.show()

# also save the data
df.to_csv("./SI_figure_qpcr_calibration/qpcr_ct.csv", index=False)

# Find regression data

In [None]:
data = px.get_trendline_results(fig).copy()
data[["slope", "intercept", "R2"]] = data.px_fit_results.apply(lambda x: pd.Series({"slope": x.params[1], "intercept": x.params[0], "R2": x.rsquared}))
data['eff_exp'] = 10**(1/data['slope'])-1

data

In [None]:
res = {}
for group in data['seq_id'].unique():
    idata = data[data['seq_id'] == group]
    res[group] = smd.describe(idata['eff_exp'], stats=['mean', 'ci'], use_t=True)['eff_exp']
    res[group]['delta_ci'] = res[group]['upper_ci'] - res[group]['mean']
pd.DataFrame(res.values(), index=res.keys())

# Plot experimental efficiency

In [None]:
result = data.groupby(['seq_id'], as_index=False).agg({'eff_exp':['mean','std']})
result.columns = ['seq_id', 'mean', 'std']

cut_interval = [0.05, 0.55]
bar = px.bar(
    result,
    x="seq_id",
    y="mean",
    color_discrete_map=colormap,
)
fig = make_subplots(
    rows=2,
    cols=1,
    row_heights=[0.8, 0.2],
    vertical_spacing=0.05,
    shared_xaxes=True,
)

fig.add_traces(bar.data, rows=[1]*len(bar.data), cols=[1]*len(bar.data))
fig.add_traces(bar.data, rows=[2]*len(bar.data), cols=[1]*len(bar.data))

fig.update_yaxes(range=[cut_interval[1], 1], row=1, col=1)
fig.update_xaxes(visible=False, row=1, col=1)
fig.update_yaxes(range=[0, cut_interval[0]], row=2, col=1)

fig.update_traces(marker_color=[
    colormap["#00006"],
    colormap["#01634"],
    colormap["#09807"],
    colormap["#11493"],
    colormap["GCall"],
])

fig.add_trace(
    px.scatter(
        data,
        x="seq_id",
        y="eff_exp",
        color_discrete_sequence=["black"]
    ).data[0]
)

fig.add_hline(
    y=result.loc[result['seq_id'] == "GCall", "mean"].values[0], 
    line_dash="dash", 
    line_color="#636363", 
    line_width=1,
    opacity=1,
)


fig.update_layout(
    xaxis_title="Sequence ID",
    yaxis_title="qPCR efficiency",
    width=200,
    height=200,
    margin=dict(l=0, r=10, t=60, b=0),
    showlegend=False,
)

fig.update_yaxes(
    tickformat=',.0%', 
    dtick=0.2,
    minor_dtick=0.1,
)

fig = plotting.standardize_plot(fig)
fig.write_image("figure_2_qpcr_results/qpcr_efficiency.svg")
fig.show()

# also save the data
data[['seq_id', 'exp', 'slope', 'intercept', 'R2', 'eff_exp']].to_csv("figure_2_qpcr_results/qpcr_efficiency.csv", index=False)

# One-way ANOVA + Tukeys range test

In [None]:
scipy.stats.levene(
    *[data.loc[data['seq_id'] == sid, "eff_exp"].values for sid in data.seq_id.unique()], 
    center='median'
)

In [None]:
m = smf.ols('eff_exp ~ C(seq_id)', data=data).fit()
display(m.summary())

display(sm.stats.anova_lm(m, typ=2))

In [None]:
posthoc = pairwise_tukeyhsd(endog = data["eff_exp"], groups = data["seq_id"])
display(posthoc.summary())
display(posthoc.pvalues)