In [None]:
import base
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

## Setup

- Load data
- Add metadata
- Draw gates
- Gate transfected cells

Result from this section: DataFrame `df` representing transfected cells.

Load all transfection data collected as of 2024.03.27

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'

exp45_1_path = base_path/'2022.07.15_exp45'
exp45_2_path = base_path/'2023.01.14_exp45.2'/'export'
exp45_3_path = base_path/'2023.02.10_exp45.3'/'export'

exp55_1_path = base_path/'2023.01.14_exp55'/'export'
exp55_2_path = base_path/'2023.02.10_exp55.2'/'export'
exp55_3_path = base_path/'2024.03.18_exp55.3'/'export'

exp67_1_path = base_path/'2023.06.15_exp67'/'export'
exp67_2_path = base_path/'2023.06.17_exp67.2'/'export'
exp67_3_path = base_path/'2024.03.18_exp67.3'/'export'

plates = pd.DataFrame({
    'data_path': [exp45_1_path/'293T'/'export', exp45_2_path, exp45_3_path,
                  exp55_1_path, exp55_2_path, exp55_3_path,
                  exp67_1_path/'plate1', exp67_1_path/'plate2', exp67_2_path/'plate1', exp67_2_path/'plate2', exp67_3_path/'plate1', exp67_3_path/'plate2'],
    'yaml_path': ([exp45_1_path/'exp45_wells2.yaml', exp45_2_path/'exp45.2_wells2.yaml', exp45_2_path/'exp45.2_wells2.yaml'] + 
                  [exp55_1_path/'exp55_wells2.yaml', exp55_2_path/'exp55.2_wells2.yaml', exp55_1_path/'exp55_wells2.yaml'] + 
                  [exp67_1_path/'exp67_plate1_wells2.yaml', exp67_1_path/'exp67_plate2_wells2.yaml']*2 + [exp67_3_path/'exp67.3_plate1_wells.yaml', exp67_3_path/'exp67.3_plate2_wells.yaml']),
    'biorep': [1, 2, 3,
               1, 2, 3,
               1, 1, 2, 2, 3, 3],
    'exp': ['exp45', 'exp45.2', 'exp45.3',
            'exp55', 'exp55.2', 'exp55.3',
            'exp67', 'exp67', 'exp67.2', 'exp67.2', 'exp67.3', 'exp67.3']
})

output_path = rd.rootdir/'output'/'controller-tuning-transfections'
cache_path = rd.rootdir/'output'/'controller-tuning-transfections'/'data.gzip'

metadata_keys = set()
for p in plates['yaml_path'].unique():
    rd.plot.plot_well_metadata(p)
    metadata_keys.update(rd.flow.load_well_metadata(p).keys())
display(metadata_keys)

In [None]:
# Load data
data = pd.DataFrame()
if cache_path.is_file(): data = pd.read_parquet(cache_path)
else: 
    data = rd.flow.load_groups_with_metadata(plates)

    # Remove unneeded columns and negative channel values
    channel_list = ['mCherry-A','mRuby2-A','FSC-A','SSC-A','tagBFP-A','mGL-A']
    for c in data.columns:
        if not(c in (list(plates.columns) + channel_list + list(metadata_keys))):
            data.drop(columns=c, inplace=True)
    for c in channel_list: data = data[data[c]>0]
    
    data.dropna(inplace=True)
    data.to_parquet(rd.outfile(cache_path))
display(data)

In [None]:
# Add metadata for constructs
metadata = base.get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
data = data.merge(metadata, how='left', on='construct')
display(data)

# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
construct_palette = metadata_dict['color']
construct_markers = metadata_dict['markers']

Draw gates based on untransfected population, then gate transfected cells

In [None]:
gates = pd.DataFrame()
for channel in ['tagBFP-A', 'mGL-A', 'mCherry-A', 'mRuby2-A']:
    gates[channel] = data[data['construct']=='UT'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates.reset_index(inplace=True)

# Add missing gates
gates.loc[gates['exp']=='exp67.2', 'mGL-A'] = gates.loc[gates['exp']=='exp67', 'mGL-A'].values[0]
gates.loc[gates['exp']=='exp67.2', 'mRuby2-A'] = gates.loc[gates['exp']=='exp67', 'mRuby2-A'].values[0]

# Indicate which channels are relevant for each experiment
gates.sort_values(['exp'], inplace=True)
gates['marker'] = ['tagBFP-A']*6 + ['mGL-A']*3
gates['output'] = ['mCherry-A']*6 + ['mRuby2-A']*3

display(gates)

Gate data per experiment based on transfection marker expression

In [None]:
data = data.groupby('exp')[data.columns].apply(lambda x: base.gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
max = 1e6
promoter_order = {k:v for k,v in enumerate(['pMXs','hPGK','EFS','CMV'])}
df = data[(data['expressing']) & (data['output']<max)].sort_values(by='promoter', key=lambda x: x.map(promoter_order))
display(df)

## Summarize

Create summary plots of common metrics across all the data.

Note that these plots are mainly FacetGrids, not artisanal plots suitable for figures/presentations.

1. Histograms of output

In [None]:
for miR, group in df.groupby('miR'):
    plot_df = group[(group['biorep']==2)]
    g = sns.displot(data=plot_df, x='output', hue='construct', palette=construct_palette, row='design', col='promoter', kind='kde',
                    facet_kws=dict(margin_titles=True, sharex=True, sharey=True), log_scale=True, legend=False, common_norm=False,)
    g.figure.savefig(rd.outfile(output_path/(f'hist_{miR}.svg')), bbox_inches='tight')

In [None]:
for design, group in df.groupby('design'):
    plot_df = group[(group['biorep']==2)]
    g = sns.displot(data=plot_df, x='output', hue='construct', palette=construct_palette, row='miR', col='promoter', kind='kde',
                    facet_kws=dict(margin_titles=True, sharex=True, sharey=True), log_scale=True, legend=False, common_norm=False,)
    g.figure.savefig(rd.outfile(output_path/(f'hist_design{design}.svg')), bbox_inches='tight')

In [None]:
for promoter, group in df.groupby('promoter'):
    plot_df = group[(group['biorep']==2)]
    g = sns.displot(data=plot_df, x='output', hue='construct', palette=construct_palette, row='miR', col='design', kind='kde',
                    facet_kws=dict(margin_titles=True, sharex=True, sharey=True), log_scale=True, legend=False, common_norm=False,)
    g.figure.savefig(rd.outfile(output_path/(f'hist_{promoter}.svg')), bbox_inches='tight')

In [None]:
# Bin by transfection marker
min_count = 100
df['bin_marker'] = df.groupby(['construct','exp'])['marker'].transform(lambda x: pd.cut(x, np.logspace(2,6,15)))
df['remove_bin'] = df.groupby(['construct','exp','bin_marker'])['bin_marker'].transform(lambda x: x.count() < min_count)
df_binned = df[~df['remove_bin']].copy()

# Group and compute stats
stat_list = [np.mean, np.std, sp.stats.gmean, sp.stats.gstd, sp.stats.variation]
grouped = df_binned.groupby(by=['construct','biorep','exp','bin_marker'])
stats_bin = grouped[['marker','output']].agg(stat_list).reset_index().dropna()

# Rename columns as 'col_stat'
stats_bin.columns = stats_bin.columns.map(lambda i: base.rename_multilevel_cols(i))
stats_bin['count'] = grouped['marker'].count().reset_index()['marker']
stats_bin = stats_bin.merge(metadata, how='left', on='construct')

# Compute mean/median on bin span
df_binned['bin_marker_mean'] = df_binned['bin_marker'].map(lambda x: np.mean([x.left, x.right]))
df_binned['bin_marker_median'] = df_binned['bin_marker'].map(lambda x: np.median([x.left, x.right]))
stats_bin['bin_marker_mean'] = stats_bin['bin_marker'].map(lambda x: np.mean([x.left, x.right]))
stats_bin['bin_marker_median'] = stats_bin['bin_marker'].map(lambda x: np.median([x.left, x.right]))

display(stats_bin)

In [None]:
# Normalize gmean to one bin
norm_point = 2e2
grouped = df_transfected.groupby(['construct','exp'])
for (construct, exp), group in grouped:
    channel = gates.loc[gates['exp']==exp, 'marker'].values[0]
    bin = [b for b in group['bin_'+channel].unique() if b.left < norm_point and b.right > norm_point]
    if not bin: continue
    normalizer = sp.stats.gmean(group.loc[(group['bin_'+channel] == bin[0]), 'mCherry-A'])
    df_transfected.loc[(df_transfected['construct']==construct) & (df_transfected['exp']==exp), 'mCherry-A_norm'] = df_transfected.loc[(df_transfected['construct']==construct) & (df_transfected['exp']==exp), 'mCherry-A'] / normalizer
    df_transfected.loc[(df_transfected['construct']==construct) & (df_transfected['exp']==exp), 'mCherry-A_norm_log'] = df_transfected.loc[(df_transfected['construct']==construct) & (df_transfected['exp']==exp), 'mCherry-A'].transform(np.log10) / np.log10(normalizer)
    normalizer = min(df_transfected.loc[(df_transfected['construct']==construct) & (df_transfected['exp']==exp), 'bin_'+channel+'_median'])
    df_transfected.loc[(df_transfected['construct']==construct) & (df_transfected['exp']==exp), 'bin_'+channel+'_median_norm'] = df_transfected.loc[(df_transfected['construct']==construct) & (df_transfected['exp']==exp), 'bin_'+channel+'_median'].astype(float) / normalizer
display(df_transfected)

In [None]:
display(df_binned)#.groupby(by)[['bin_marker_median','output']].apply(lambda x: x['output'] / x['bin_marker_median'].min()).reset_index())

In [None]:
# Normalize output to gmean of output in smallest bin, and normalize marker bin by smallest bin
def normalize_output(df):
    df = df.copy()
    normalizer = sp.stats.gmean(df.loc[(df['bin_marker_median']==df['bin_marker_median'].min()), 'output'])
    df['output_norm'] = df['output'].astype(float) / normalizer
    df['bin_marker_median_norm'] = df['bin_marker_median'].astype(float) / df['bin_marker_median'].min()
    return df

by = ['construct','biorep','exp']
df_binned = df_binned.groupby(by)[df_binned.columns].apply(normalize_output).reset_index(drop=True)
display(df_binned)

In [None]:
for miR, group in df_binned.groupby('miR'):
    plot_df = group[group['biorep']==3]
    g = sns.relplot(data=plot_df, row='design', col='promoter', facet_kws=dict(sharex=True, sharey=True, margin_titles=True,), kind='line',
                    height=3, aspect=1.3, x='bin_marker_median_norm', y='output_norm', hue='construct', #row_order=[0,1,2,3], 
                    palette=construct_palette, legend=False, dashes=False, style='construct', markers=construct_markers,
                    estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
    g.set(xscale='log', yscale='log', ylim=(2e-1,2e3))
    sns.despine()
    for name, ax in g.axes_dict.items(): ax.grid()
    g.figure.savefig(rd.outfile(output_path/(f'line_{miR}.svg')), bbox_inches='tight')