In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

## Setup

- Load data
- Add metadata
- Draw gates
- Gate transfected cells

Result from this section: DataFrame `df` representing transfected cells.

Load old transfection data containing RC016-18 (CMV / mCherry-MCP / miR-FF5)

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'

exp45_1_path = base_path/'2022.07.15_exp45'
exp45_2_path = base_path/'2023.01.14_exp45.2'/'export'
exp45_3_path = base_path/'2023.02.10_exp45.3'/'export'

plates = pd.DataFrame({
    'data_path': [exp45_1_path/'293T'/'export', exp45_2_path, exp45_3_path,],
    'yaml_path': [exp45_1_path/'exp45_wells2.yaml', exp45_2_path/'exp45.2_wells2.yaml', exp45_2_path/'exp45.2_wells2.yaml'],
    'biorep': [1, 2, 3,],
    'exp': ['exp45', 'exp45.2', 'exp45.3',]
})

output_path = rd.rootdir/'output'/'tuning_old-data'
cache_path = output_path/'data.gzip'

metadata_keys = set()
for p in plates['yaml_path'].unique():
    rd.plot.plot_well_metadata(p)
    metadata_keys.update(rd.flow.load_well_metadata(p).keys())
display(metadata_keys)

In [None]:
# Load data
data = pd.DataFrame()
if cache_path.is_file(): data = pd.read_parquet(cache_path)
else: 
    channel_list = ['mRuby2-A','tagBFP-A','mGL-A']
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data = data[data[c]>0]

    data.dropna(inplace=True)
    data.to_parquet(rd.outfile(cache_path))
display(data)

Draw gates based on untransfected population, then gate transfected cells

In [None]:
gates = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A', 'tagBFP-A']
for channel in channel_list:
    gates[channel] = data[data['construct']=='UT'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates.reset_index(inplace=True)

# Indicate which channels are relevant for each experiment
gates['marker'] = 'tagBFP-A'
gates['output'] = 'mRuby2-A'

display(gates)

In [None]:
data = data.groupby('exp')[data.columns].apply(lambda x: base.gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
df = data[(data['expressing'])]
display(df)

In [None]:
# Add metadata for constructs
metadata = base.get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
data = data.merge(metadata, how='left', on='construct')
display(data)

metadata_dict = metadata.set_index('construct').to_dict('dict')
construct_palette = metadata_dict['color'] # colored by design
construct_markers = metadata_dict['markers']

In [None]:
plot_df = data.groupby(['exp','construct']).sample(1000)
g = sns.displot(data=plot_df, x='mGL-A', y='tagBFP-A', hue='exp',
                kind='kde', col='construct', col_wrap=5,
                common_norm=False, levels=5, log_scale=True)

In [None]:
data = data.groupby('exp')[data.columns].apply(lambda x: base.gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
df = data[(data['expressing']) & (data['construct']!='UT')]
display(df)

In [None]:
# Bin data and calculate statistics
df_quantiles, stats, _, fits = base.calculate_bins_stats(df)
stats = stats.merge(metadata, how='left', on='construct')
fits = fits.merge(metadata, how='left', on='construct')

In [None]:
plot_df = df_quantiles[(df_quantiles['construct'].isin(['RC016','RC017','RC018']))]
g = sns.relplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=construct_palette, 
             legend=False, dashes=False, style='construct', markers=construct_markers, kind='line', markersize=10, markeredgewidth=1,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)),
             col='exp')
g.set(xscale='log', yscale='log', xlabel='marker', ylim=(4e1,1e6))
for exp, ax in g.axes_dict.items(): 
    marker_baseline = stats.loc[(stats['exp']==exp) & (stats['group']=='marker'), 'output_gmean'].mean()
    ax.axhline(marker_baseline, color='black', ls=':', zorder=0)
g.figure.savefig(rd.outfile(output_path/'line_RC016-18.svg'))

In [None]:
plot_df = df_quantiles[(df_quantiles['design']>0) & (df_quantiles['promoter']=='CMV') & (df_quantiles['exp']=='exp45')]
g = sns.relplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=construct_palette, 
             legend=False, dashes=False, style='construct', markers=construct_markers, kind='line', markersize=10, markeredgewidth=1,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)),
             col='design', facet_kws=dict(margin_titles=True))
g.set(xscale='log', yscale='log', xlabel='marker', ylim=(4e1,1e6))
for _, ax in g.axes_dict.items(): 
    marker_baseline = stats.loc[(stats['exp']=='exp45') & (stats['group']=='marker'), 'output_gmean'].mean()
    ax.axhline(marker_baseline, color='black', ls=':', zorder=0)
g.figure.savefig(rd.outfile(output_path/'line_by-exp-design_biorep1.svg'), bbox_inches='tight')

In [None]:
plot_df = df_quantiles[(df_quantiles['miR']=='miR.FF3')]
display(plot_df['construct'].unique())
g = sns.relplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=construct_palette, 
             legend=False, dashes=False, style='construct', markers=construct_markers, kind='line', markersize=10, markeredgewidth=1,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)),
             col='exp')
g.set(xscale='log', yscale='log', xlabel='marker', ylim=(4e1,1e6))
for exp, ax in g.axes_dict.items(): 
    marker_baseline = stats.loc[(stats['exp']==exp) & (stats['group']=='marker'), 'output_gmean'].mean()
    ax.axhline(marker_baseline, color='black', ls=':', zorder=0)
g.figure.savefig(rd.outfile(output_path/'line_miR.FF3.svg'))