In [None]:
import base
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy.stats
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

## Setup

- Load data
- Add metadata
- Draw gates
- Gate transfected cells

Result from this section: DataFrame `df` representing transfected cells.

Load all lenti collection data collected as of 2024.03.27

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'/'2024.04.05_exp89'/'export'

plates = pd.DataFrame({
    'data_path': [base_path/'293T_control', base_path/'293T_plate1', base_path/'293T_plate2', base_path/'293T_plate3',
                  base_path/'MEF_3_plate1', base_path/'MEF_4-1_plate1', base_path/'MEF_4-1_plate2', base_path/'MEF_4-1_plate3'],
    'yaml_path': [base_path/'plate_control.yaml', base_path/'plate01.yaml', base_path/'plate02.yaml', base_path/'plate03.yaml',
                  base_path/'mef_3_plate01.yaml', base_path/'mef_4-1_plate01.yaml', base_path/'mef_4-1_plate02.yaml', base_path/'mef_4-1_plate03.yaml'

    ],
})

output_path = rd.rootdir/'output'/'lenti-exp89'
cache_path = output_path/'data.gzip'

metadata_keys = set()
for p in plates['yaml_path'].unique():
    print(p)
    rd.plot.plot_well_metadata(p)
    metadata_keys.update(rd.flow.load_well_metadata(p).keys())
display(metadata_keys)

In [None]:
# Load data
data = pd.DataFrame()
if cache_path.is_file(): data = pd.read_parquet(cache_path)
else: 
    channel_list = ['mCherry-A','mRuby2-A','FSC-A','SSC-A','tagBFP-A','mGL-A']
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data = data[data[c]>0]
    
    data.to_parquet(rd.outfile(cache_path))
# Create dicts to specify colors/markers
metadata = base.get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
metadata['TS'] = metadata['ts_kind']
metadata_dict = metadata.set_index('construct').to_dict('dict')
construct_palette = metadata_dict['color']
construct_markers = metadata_dict['markers']
display(data)

In [None]:
gates = pd.DataFrame()
for channel in ['mGL-A', 'mRuby2-A']:
    gates[channel] = data[data['construct']=='untransfected'].groupby(['cell'])[channel].apply(lambda x: x.quantile(0.9998))
gates.reset_index(inplace=True)

# Indicate which channels are relevant for each experiment
#gates.sort_values(['exp'], inplace=True)
#gates['marker'] = ['tagBFP-A']*6 + ['mGL-A']*3
#gates['output'] = ['mCherry-A']*6 + ['mRuby2-A']*3

display(gates)

Gate data based on the transfection marker

n.b. we have to reuse the 293T gate for the MEFs because we didn't have untransfected MEFs :(

In [None]:
g = sns.FacetGrid(data=data, hue='dox', row='construct', col='cell')
g.map(sns.kdeplot, 'mGL-A', log_scale=True, common_norm=False)
for ax in g.axes.flatten():
    ax.axvline(gates.iloc[0]['mGL-A'], ls=':', color='k')

In [None]:
gated = data[(data['mGL-A'] > gates.iloc[0]['mGL-A']) & (data.construct != 'untransfected')].copy()
display(gated)

In [None]:
g = sns.FacetGrid(data=gated, hue='dox', row='construct', col='cell')
g.map(sns.kdeplot, 'mRuby2-A', log_scale=True, common_norm=False)

### Binning
For each cell type and dox condition, pick 30 quantiles to discretize into, and assign cells to corresponding bins.
Then, groupby per condition on bins and compute gmeans

In [None]:
gated.groupby(['cell', 'dox', 'construct']).apply(lambda df: pd.qcut(df['mGL-A'], 20), include_groups=False).reset_index()

In [None]:
transfection_bin = gated.groupby(['cell', 'dox', 'construct']).apply(lambda df: pd.qcut(df['mGL-A'], 20), include_groups=False).reset_index().set_index('level_3')['mGL-A']
gated['transfection_bin'] = transfection_bin

In [None]:
# Compute bin gmeans

In [None]:
summary_gmeans = gated.groupby(['cell', 'construct', 'dox', 'lenti_293T_passage', 'transfection_bin']).apply(lambda df: df.loc[:, ('mGL-A', 'mRuby2-A')].apply(scipy.stats.gmean), include_groups=False).reset_index()

In [None]:
summary_gmeans

In [None]:
metadata

In [None]:
g = sns.FacetGrid(data=summary_gmeans[summary_gmeans.dox == 1000], row='cell', col='construct', hue='lenti_293T_passage', margin_titles=True)
g.map(sns.scatterplot, 'mGL-A', 'mRuby2-A')
plt.show()
g = sns.FacetGrid(data=summary_gmeans[summary_gmeans.dox == 1000], row='cell', col='construct', hue='lenti_293T_passage', margin_titles=True)
g.map(sns.scatterplot, 'mGL-A', 'mRuby2-A')
for ax in g.axes.flatten():
    pass
    ax.set_xscale('log')
    ax.set_yscale('log')
plt.show()

In [None]:
metadata[(metadata.construct>='RC124') & (metadata.construct<='RC135')]