In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
import rushd as rd
import scipy as sp
import seaborn as sns

from statannotations.Annotator import Annotator

from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

In [None]:
base_path = rd.datadir/'instruments'/'data'/'qPCR'/'emma'/'command'
output_path = rd.rootdir/'output'/'qPCR'/'new'
cache_path = output_path/'data.gzip'

plates = pd.DataFrame({
    'data_path': [base_path/'2024.11.13_command'/'2024.11.13_ELP_command_qPCR_Cp.txt', base_path/'2024.11.22_command'/'2024.11.22_ELP_command_qPCR_Cp.txt', base_path/'2024.12.03_command'/'2024.12.03_ELP_command_qPCR_Cp.txt'],
    'yaml_path': [base_path/'2024.11.13_command'/'wells.yaml', base_path/'2024.11.22_command'/'wells.yaml', base_path/'2024.12.03_command'/'wells.yaml'],
    'exp': ['exp123', 'exp123.2', 'exp123.4']
})

for p in plates['yaml_path'].unique():
    rd.plot.plot_well_metadata(p)

In [None]:
data = pd.DataFrame()
if cache_path.exists(): data = pd.read_parquet(cache_path)
else: 
    data = base.load_data_qpcr(plates)
    data.to_parquet(rd.outfile(cache_path))
display(data)

In [None]:
# Add metadata for constructs
metadata = base.get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
data = data.merge(metadata, how='left', on='construct')
display(data)

# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
main_palette = metadata_dict['color']
main_markers = metadata_dict['markers']

ts_label = {'na': 'base', 'NT': 'OL', 'T': 'CL', 'none': '–'}

In [None]:
primers_list = ['GAPDH', 'FXN', 'mRuby2-1', 'mRuby2-2', 'FMRP-1', 'FMRP-2', 'EGFP-1', 'EGFP-2']
data['primers'] = data['primers'].astype(pd.api.types.CategoricalDtype(categories=primers_list, ordered=True))

In [None]:
plot_df = data[data['construct'].isin(['UI','no-RT', 'no-cDNA'])]
g = sns.catplot(data=plot_df, x='construct', y='Cp', col='biorep', row='primers', hue='construct')

In [None]:
colors = [base.colors['red'], base.colors['orange'], base.colors['green'], base.colors['teal'], base.colors['blue'], base.colors['purple']]
biorep_palette = {k:v for k,v in zip(range(1,6), colors)}

In [None]:
plot_df = data[data['construct']=='UI']
g = sns.catplot(data=plot_df, x='biorep', y='Cp', col='primers', hue='biorep', palette=biorep_palette)
for name, ax in g.axes_dict.items():
    for i in range(1,6):
        baseline = data.loc[(data['biorep']==i) & (data['primers']==name) & (data['construct'].isin(['no-RT','no-cDNA'])), 'Cp'].median()
        ax.axhline(baseline, c=biorep_palette[i], zorder=0)

In [None]:
plot_df = data[~(data['construct'].isin(['no-RT','no-cDNA'])) & (data['dox'])]
g = sns.catplot(data=plot_df, x='construct', y='Cp', row='biorep', col='primers', hue='biorep', palette=biorep_palette)

In [None]:
# Normalize by GAPDH Cp (delta_Cp)
def get_housekeeping(df, housekeeping='GAPDH'):
    value = df.loc[df['primers']==housekeeping, 'Cp'].values[0]
    df[housekeeping+'_Cp'] = value
    return df

stats = data.groupby(['biorep','exp','construct','primers','dox'])[['Cp']].median().reset_index().dropna(subset='Cp')
stats = stats.groupby(['exp','biorep','construct','dox'])[stats.columns].apply(get_housekeeping).reset_index(drop=True)
stats['delta_Cp'] = stats['Cp'] - stats['GAPDH_Cp']
display(stats)

In [None]:
plot_df = stats[stats['construct']=='UI']
g = sns.catplot(data=plot_df, x='biorep', y='Cp', col='primers', hue='biorep', palette=biorep_palette)
for name, ax in g.axes_dict.items():
    for i in range(1,6):
        baseline = data.loc[(data['biorep']==i) & (data['primers']==name) & (data['construct'].isin(['no-RT','no-cDNA'])), 'Cp'].median()
        ax.axhline(baseline, c=biorep_palette[i], zorder=0)

In [None]:
plot_df = stats[stats['construct']=='UI']
g = sns.stripplot(data=plot_df, x='primers', y='Cp', hue='biorep', palette=biorep_palette)
g.set_xticklabels(g.get_xticklabels(), ha='right', rotation=90)

In [None]:
plot_df = data[~(data['construct'].isin(['UI','no-RT', 'no-cDNA'])) & (data['dox'])]
g = sns.catplot(data=plot_df, x='construct', y='Cp', col='biorep', row='primers', hue='construct', margin_titles=True, s=30)
for (primers,biorep), ax in g.axes_dict.items():
    if primers=='EGFP-2': ax.set_xticklabels(ax.get_xticklabels(), ha='right', rotation=90)
g.figure.savefig(rd.outfile(output_path/'qc_technical-reps.svg'))

In [None]:
# Normalize by GAPDH Cp (delta_Cp)
def get_housekeeping(df, housekeeping='GAPDH'):
    value = df.loc[df['primers']==housekeeping, 'Cp']
    if value.empty: return df
    df[housekeeping+'_Cp'] = value.median()
    return df

# Remove points where Cp = 35 (poor amplification, technical outliers or zero amplification conditions)
# Also, ignore -dox conditions
df = data[(data['Cp']<35) & (data['dox'])].copy()

# Calculate GAPDH Cp for each condition
df = df.groupby(['exp','biorep','construct'])[df.columns].apply(get_housekeeping).reset_index(drop=True)

# Combine technical reps (median)
stats = df.groupby(['exp','biorep','construct','primers','GAPDH_Cp'])[['Cp']].median().reset_index().dropna(subset='Cp')

# Exclude points where Cp ~ Cp of negative controls (no-cDNA, no-RT)
def get_negative(df):
    value = df.loc[df['construct'].isin(['no-RT','no-cDNA']), 'Cp']
    if value.empty: result = 35
    else: result = value.median()
    df['negative_Cp'] = result
    return df

stats = stats.groupby(['exp','primers'])[stats.columns].apply(get_negative).reset_index(drop=True)
filtered = stats[(stats['Cp'] < stats['negative_Cp']) & ~(stats['construct'].isin(['no-RT','no-cDNA']))]
filtered['expression'] = 2**(-filtered['Cp'])

# Normalize expression relative to GAPDH
filtered['delta_Cp'] = filtered['Cp'] - filtered['GAPDH_Cp']
filtered['norm_expression'] = 2**(-filtered['delta_Cp'])

# Add metadata
metadata = base.get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
metadata['ts_label'] = metadata['ts_kind'].map({'na': 'base', 'NT': 'OL', 'T': 'CL', 'none': '–'})
filtered = filtered.merge(metadata, how='left', on='construct')
filtered['Cp'] = filtered['Cp'].astype(float)
filtered['delta_Cp'] = filtered['delta_Cp'].astype(float)
filtered['norm_expression'] = filtered['norm_expression'].astype(float)

# Normalize to base gene within bioreps
def get_control(df):
    value = df.loc[df['ts_label']=='base', 'delta_Cp']
    if value.empty: return df
    df['base_delta_Cp'] = value.values[0]
    return df

# Calculate base delta_Cp for each condition
filtered = filtered.groupby(['exp','biorep','primers'])[filtered.columns].apply(get_control).reset_index(drop=True)

# Normalize expression relative to base gene (delta_delta_Cp)
filtered['delta_delta_Cp'] = filtered['delta_Cp'] - filtered['base_delta_Cp']
filtered['relative_expression'] = 2**(-filtered['delta_delta_Cp'])

# Remove conditions with GAPDH Cp > 20
filtered_more = filtered[filtered['GAPDH_Cp'] < 21]

display(filtered)

In [None]:
plot_df = stats[~(stats['construct'].isin(['no-RT','no-cDNA']))]
g = sns.catplot(data=plot_df, x='construct', y='Cp', col='primers', hue='biorep', palette=biorep_palette, col_wrap=4)
for primers, ax in g.axes_dict.items():
    for biorep in range(1,6):
        baseline = df[(df['construct'].isin(['no-RT','no-cDNA'])) & (df['primers']==primers) & (df['biorep']==biorep)]
        if baseline.empty: continue
        ax.axhline(baseline['Cp'].median(), color=biorep_palette[biorep], zorder=0)
    if primers not in fxn_list: ax.set_xticklabels(ax.get_xticklabels(), ha='right', rotation=90)

g.figure.savefig(rd.outfile(output_path/'qc_negative-control.svg'))

In [None]:
plot_df = filtered
g = sns.catplot(data=plot_df, x='construct', y='Cp', col='primers', hue='biorep', palette=biorep_palette)

In [None]:
plot_df = filtered[filtered['construct']!='UI']
g = sns.catplot(data=plot_df, x='construct', y='expression', col='primers', hue='biorep', palette=biorep_palette)

In [None]:
# Plot FXN
fxn_list = ['GAPDH','FXN','mRuby2-1','mRuby2-2']
pairs = [('base','OL'), ('base','CL'), ('OL','CL')]

fig, axes = plt.subplots(1,len(fxn_list), figsize=(10,5), sharey=True)

for ax, primers in zip(axes, fxn_list):
    plot_df = filtered[(filtered['name'].str.contains('FXN')) & (filtered['primers']==primers)]
    sns.stripplot(data=plot_df, x='ts_label', y='Cp', hue='biorep', palette=biorep_palette, 
                  ax=ax, legend=False)

    ax.set(ylim=(35,15), xlabel='', ylabel='', yticks=range(35,15,-2), title=primers)
    ax.set(yticklabels=['']*len(ax.get_yticklabels()))
    sns.despine(ax=ax)

    annotator = Annotator(ax, pairs, data=plot_df, x='ts_label', y='Cp',)
    annotator.configure(test='t-test_ind', text_format='star', loc='inside', line_height=0,
                        text_offset=-2, line_offset_to_group=10).apply_and_annotate()
    
axes[0].set(ylabel='Ct', yticklabels=range(35,15,-2))

In [None]:
# Plot FXN
fxn_list = ['GAPDH','FXN','mRuby2-1','mRuby2-2']
pairs = [('base','OL'), ('base','CL'), ('OL','CL')]

fig, axes = plt.subplots(1,len(fxn_list), figsize=(10,5), sharey=True)

for ax, primers in zip(axes, fxn_list):
    plot_df = filtered[(filtered['name'].str.contains('FXN')) & (filtered['primers']==primers) & (filtered['biorep']>1)]
    sns.stripplot(data=plot_df, x='ts_label', y='Cp', hue='biorep', palette=biorep_palette, 
                  ax=ax, legend=False)

    ax.set(ylim=(35,15), xlabel='', ylabel='', yticks=range(35,15,-2), title=primers)
    ax.set(yticklabels=['']*len(ax.get_yticklabels()))
    sns.despine(ax=ax)

    annotator = Annotator(ax, pairs, data=plot_df, x='ts_label', y='Cp',)
    annotator.configure(test='t-test_ind', text_format='star', loc='inside', line_height=0,
                        text_offset=-2, line_offset_to_group=10).apply_and_annotate()
    
axes[0].set(ylabel='Ct', yticklabels=range(35,15,-2))

In [None]:
# Plot FXN
fxn_list = ['GAPDH','FXN','mRuby2-1','mRuby2-2']
pairs = [('base','OL'), ('base','CL'), ('OL','CL')]

fig, axes = plt.subplots(1,len(fxn_list), figsize=(10,5), sharey=True)

for ax, primers in zip(axes, fxn_list):
    plot_df = filtered[(filtered['name'].str.contains('FXN')) & (filtered['primers']==primers)]
    sns.stripplot(data=plot_df, x='ts_label', y='Cp', hue='biorep', palette=biorep_palette, 
                  ax=ax, legend=False)

    ax.set(ylim=(35,10), xlabel='', ylabel='', yticks=range(35,10,-4), title=primers)
    ax.set(yticklabels=['']*len(ax.get_yticklabels()))
    sns.despine(ax=ax)
    
axes[0].set(ylabel='Ct', yticklabels=range(35,10,-4))
fig.savefig(rd.outfile(output_path/'Cp_FXN.svg'))

In [None]:
# Plot FXN
fxn_list = ['GAPDH','FXN','mRuby2-1','mRuby2-2']
pairs = [('base','OL'), ('base','CL'), ('OL','CL')]

fig, axes = plt.subplots(1,len(fxn_list), figsize=(10,5), sharey=True)

for ax, primers in zip(axes, fxn_list):
    plot_df = filtered[(filtered['name'].str.contains('FXN')) & (filtered['primers']==primers) & (filtered['biorep']>1)]
    sns.stripplot(data=plot_df, x='ts_label', y='Cp', hue='biorep', palette=biorep_palette, 
                  ax=ax, legend=False)

    ax.set(ylim=(35,10), xlabel='', ylabel='', yticks=range(35,10,-4), title=primers)
    ax.set(yticklabels=['']*len(ax.get_yticklabels()))
    sns.despine(ax=ax)
    
axes[0].set(ylabel='Ct', yticklabels=range(35,10,-4))
fig.savefig(rd.outfile(output_path/'Cp_FXN_no-biorep1.svg'))

In [None]:
# Plot FMRP
fmrp_list = ['GAPDH','FMRP-1','FMRP-2','EGFP-1','EGFP-2']
pairs = [('base','OL'), ('base','CL'), ('OL','CL')]

fig, axes = plt.subplots(1,len(fmrp_list), figsize=(10,5), sharey=True)

for ax, primers in zip(axes, fmrp_list):
    plot_df = filtered[(filtered['name'].str.contains('FMRP')) & (filtered['primers']==primers)]
    sns.stripplot(data=plot_df, x='ts_label', y='Cp', hue='biorep', palette=biorep_palette, 
                  ax=ax, legend=False)

    ax.set(ylim=(27,10), xlabel='', ylabel='', yticks=range(27,10,-4), title=primers)
    ax.set(yticklabels=['']*len(ax.get_yticklabels()))
    sns.despine(ax=ax)

    annotator = Annotator(ax, pairs, data=plot_df, x='ts_label', y='Cp',)
    annotator.configure(test='t-test_ind', text_format='star', loc='inside', line_height=0,
                        text_offset=-2, line_offset_to_group=10).apply_and_annotate()
    
axes[0].set(ylabel='Ct', yticklabels=range(27,10,-4))
fig.savefig(rd.outfile(output_path/'Cp_FMRP.svg'))

In [None]:
# Plot FMRP
fmrp_list = ['GAPDH','FMRP-1','FMRP-2','EGFP-1','EGFP-2']
pairs = [('base','OL'), ('base','CL'), ('OL','CL')]

fig, axes = plt.subplots(1,len(fmrp_list), figsize=(10,5), sharey=True)

for ax, primers in zip(axes, fmrp_list):
    plot_df = filtered[(filtered['name'].str.contains('FMRP')) & (filtered['primers']==primers) & (filtered['biorep']<5)]
    sns.stripplot(data=plot_df, x='ts_label', y='Cp', hue='biorep', palette=biorep_palette, 
                  ax=ax, legend=False)

    ax.set(ylim=(27,10), xlabel='', ylabel='', yticks=range(27,10,-4), title=primers)
    ax.set(yticklabels=['']*len(ax.get_yticklabels()))
    sns.despine(ax=ax)

    annotator = Annotator(ax, pairs, data=plot_df, x='ts_label', y='Cp',)
    annotator.configure(test='t-test_ind', text_format='star', loc='inside', line_height=0,
                        text_offset=-2, line_offset_to_group=10).apply_and_annotate()
    
axes[0].set(ylabel='Ct', yticklabels=range(27,10,-4))

In [None]:
# Plot FMRP
fmrp_list = ['GAPDH','FMRP-1','FMRP-2','EGFP-1','EGFP-2']
pairs = [('base','OL'), ('base','CL'), ('OL','CL')]

fig, axes = plt.subplots(1,len(fmrp_list), figsize=(10,5), sharey=True)

for ax, primers in zip(axes, fmrp_list):
    plot_df = filtered[(filtered['name'].str.contains('FMRP')) & (filtered['primers']==primers)]
    sns.stripplot(data=plot_df, x='ts_label', y='Cp', hue='biorep', palette=biorep_palette, 
                  ax=ax, legend=False)

    ax.set(ylim=(27,10), xlabel='', ylabel='', yticks=range(27,10,-4), title=primers)
    ax.set(yticklabels=['']*len(ax.get_yticklabels()))
    sns.despine(ax=ax)
    
axes[0].set(ylabel='Ct', yticklabels=range(27,10,-4))
fig.savefig(rd.outfile(output_path/'Cp_FMRP.svg'))

In [None]:
# Remove conditions with GAPDH Cp > 20
filtered_more = filtered[filtered['GAPDH_Cp'] < 21]

# Plot FXN
fxn_list = ['GAPDH','FXN','mRuby2-1','mRuby2-2']
pairs = [('base','OL'), ('base','CL'), ('OL','CL')]

fig, axes = plt.subplots(1,len(fxn_list), figsize=(10,5), sharey=True)

for ax, primers in zip(axes, fxn_list):
    plot_df = filtered_more[(filtered_more['name'].str.contains('FXN')) & (filtered_more['primers']==primers) ]
    sns.stripplot(data=plot_df, x='ts_label', y='Cp', hue='biorep', palette=biorep_palette, 
                  ax=ax, legend=False)

    ax.set(ylim=(35,15), xlabel='', ylabel='', yticks=range(35,15,-2), title=primers)
    ax.set(yticklabels=['']*len(ax.get_yticklabels()))
    sns.despine(ax=ax)

    annotator = Annotator(ax, pairs, data=plot_df, x='ts_label', y='Cp',)
    annotator.configure(test='t-test_ind', text_format='star', loc='inside', line_height=0,
                        text_offset=-2, line_offset_to_group=10).apply_and_annotate()
axes[0].set(ylabel='Ct', yticklabels=range(35,15,-2))

# Plot FMRP
fmrp_list = ['GAPDH','FMRP-1','FMRP-2','EGFP-1','EGFP-2']
pairs = [('base','OL'), ('base','CL'), ('OL','CL')]

fig, axes = plt.subplots(1,len(fmrp_list), figsize=(10,5), sharey=True)

for ax, primers in zip(axes, fmrp_list):
    plot_df = filtered_more[(filtered_more['name'].str.contains('FMRP')) & (filtered_more['primers']==primers)]
    sns.stripplot(data=plot_df, x='ts_label', y='Cp', hue='biorep', palette=biorep_palette, 
                  ax=ax, legend=False)

    ax.set(ylim=(27,10), xlabel='', ylabel='', yticks=range(27,10,-4), title=primers)
    ax.set(yticklabels=['']*len(ax.get_yticklabels()))
    sns.despine(ax=ax)

    annotator = Annotator(ax, pairs, data=plot_df, x='ts_label', y='Cp',)
    annotator.configure(test='t-test_ind', text_format='star', loc='inside', line_height=0,
                        text_offset=-2, line_offset_to_group=10).apply_and_annotate()
axes[0].set(ylabel='Ct', yticklabels=range(27,10,-4))

In [None]:
# Plot FXN & FMRP
plots = {
    'FXN': ['GAPDH','FXN','mRuby2-1','mRuby2-2'],
    'FMRP': ['GAPDH','FMRP-1','FMRP-2','EGFP-1','EGFP-2']
}
pairs = [('base','OL'), ('base','CL'), ('OL','CL')]
y = 'norm_expression'
ylabel = '$2^{-\Delta C_t}$'

for name, primer_list in plots.items():
    fig, axes = plt.subplots(1,len(primer_list), figsize=(10,5), sharey=True)

    for ax, primers in zip(axes, primer_list):
        plot_df = filtered_more[(filtered_more['name'].str.contains(name)) & (filtered_more['primers']==primers) ]
        sns.stripplot(data=plot_df, x='ts_label', y=y, hue='biorep', palette=biorep_palette, 
                    ax=ax, legend=False)
        ax.set(xlabel='', ylabel='', title=primers)
        sns.despine(ax=ax)
        #annotator = Annotator(ax, pairs, data=plot_df, x='ts_label', y=y,)
        #annotator.configure(test='t-test_ind', text_format='star', loc='inside', line_height=0,
        #                    text_offset=-2, line_offset_to_group=10).apply_and_annotate()
    axes[0].set(ylabel=ylabel,)

In [None]:
# Plot FXN & FMRP
plots = {
    'FXN': ['GAPDH','FXN','mRuby2-1','mRuby2-2'],
    'FMRP': ['GAPDH','FMRP-1','FMRP-2','EGFP-1','EGFP-2']
}
pairs = [('base','OL'), ('base','CL'), ('OL','CL')]
y = 'relative_expression'
ylabel = '$2^{-\Delta \Delta C_t}$'

for name, primer_list in plots.items():
    fig, axes = plt.subplots(1,len(primer_list), figsize=(10,5), sharey=True)

    for ax, primers in zip(axes, primer_list):
        plot_df = filtered_more[(filtered_more['name'].str.contains(name)) & (filtered_more['primers']==primers) ]
        sns.stripplot(data=plot_df, x='ts_label', y=y, hue='biorep', palette=biorep_palette, 
                    ax=ax, legend=False)
        ax.set(xlabel='', ylabel='', title=primers)
        sns.despine(ax=ax)
        # annotator = Annotator(ax, pairs, data=plot_df, x='ts_label', y=y,)
        # annotator.configure(test='t-test_ind', text_format='star', loc='inside', line_height=0,
        #                     text_offset=-2, line_offset_to_group=10).apply_and_annotate()
    axes[0].set(ylabel=ylabel,)

In [None]:
# Plot FXN & FMRP
plots = {
    'FXN': ['FXN','mRuby2-1','mRuby2-2'],
    'FMRP': ['FMRP-1','FMRP-2','EGFP-1','EGFP-2']
}
pairs = [('base','OL'), ('base','CL'), ('OL','CL')]
y = 'relative_expression'
ylabel = '$2^{-\Delta \Delta C_t}$'

for name, primer_list in plots.items():
    fig, axes = plt.subplots(1,len(primer_list), figsize=(10,5), sharey=True)

    for ax, primers in zip(axes, primer_list):
        plot_df = filtered_more[(filtered_more['name'].str.contains(name)) & (filtered_more['primers']==primers) ]
        sns.stripplot(data=plot_df, x='ts_label', y=y, hue='biorep', palette=biorep_palette, 
                    ax=ax, legend=False)
        ax.set(xlabel='', ylabel='', title=primers)
        sns.despine(ax=ax)
        annotator = Annotator(ax, pairs, data=plot_df, x='ts_label', y=y,)
        annotator.configure(test='t-test_ind', text_format='star', loc='inside', line_height=0,
                            text_offset=-2, line_offset_to_group=10).apply_and_annotate()
    axes[0].set(ylabel=ylabel,)
    fig.savefig(rd.outfile(output_path/(y+'_'+name+'.png')))