In [None]:
import base
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

## Setup

- Load data
- Add metadata
- Draw gates
- Gate transfected cells

Result from this section: DataFrame `df` representing transfected cells.

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'Emma'

exp11_path = base_path/'2022.10.11_EXP11'/'Data'
exp11_controls_path = base_path/'2022.10.11_EXP10'/'data_controls'
exp49_path = base_path/'2024.04.06_EXP11_replicates'/'Plate_1_EXP49'/'data_singlets'
exp50_path = base_path/'2024.04.06_EXP11_replicates'/'Plate_2_EXP50'/'data_singlets'
exp49_50_controls_path = base_path/'2024.04.06_EXP11_replicates'/'Plate_3_Controls'/'data_singlets'

plates = pd.DataFrame({
    'data_path': [exp11_path, exp11_controls_path, exp49_path, exp50_path, exp49_50_controls_path],
    'yaml_path': [exp11_path/'wells-metadata.yaml', exp11_controls_path/'wells_metadata.yaml', exp49_path/'wells-metadata.yaml', exp50_path/'wells-metadata.yaml', exp49_50_controls_path/'wells_metadata.yaml']
})

output_path = rd.rootdir/'output'/'miRNA-TS-characterization'
cache_path = rd.rootdir/'output'/'miRNA-TS-characterization'/'data.gzip'

metadata_keys = set()
for p in plates['yaml_path'].unique():
    print(p)
    rd.plot.plot_well_metadata(p)
    metadata_keys.update(rd.flow.load_well_metadata(p).keys())
display(metadata_keys)

In [None]:
# Load data
data = pd.DataFrame()
if cache_path.is_file(): data = pd.read_parquet(cache_path)
else: 
    channel_list = ['mRuby2-A','FSC-A','SSC-A','mGL-A']
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data = data[data[c]>0]
    
    data.dropna(inplace=True)
    data.to_parquet(rd.outfile(cache_path))
#display(data)

In [None]:
def get_light_color(color):
    return sns.light_palette(color, 4)[1]

def get_medium_color(color):
    return sns.light_palette(color, 4)[2]

def get_metadata(miR_path, TS_path):
    miR_metadata = pd.read_excel(miR_path)
    TS_metadata = pd.read_excel(TS_path)

    metadata = miR_metadata.merge(TS_metadata, how="cross")

    miR_palette = {
    'FF3': base.colors['red'],     # FF3 intron
    'FF4': base.colors['pink'],     # FF4 intron
    'FF5': base.colors['purple'],     # FF5 intron
    'FF6': base.colors['teal'],      # FF6 intron
    'na':base.colors['black']
    }

    # Apply colors
    metadata['color'] = metadata['miR'].replace(miR_palette)
    metadata.loc[(metadata['rep-construct']=='pEP033') | (metadata['rep-construct']=='pEP038'), 'color'] = base.colors['red']
    metadata.loc[(metadata['rep-construct']=='NT'), 'color'] = base.colors['black']
    metadata.loc[(metadata['rep-construct']=='pEP043') & (metadata['miR-construct']=='na'), 'color'] = base.colors['green']
    metadata.loc[((metadata['ts-num']==0) & (metadata['miR']!='na')), 'color'] = base.colors['gray']

    # Change shades
    metadata.loc[(metadata['ts-num']==4), 'color'] = metadata.loc[(metadata['ts-num']==4), 'color'].apply(get_light_color)
    metadata.loc[(metadata['ts-num']==2), 'color'] = metadata.loc[(metadata['ts-num']==2), 'color'].apply(get_medium_color)

    return metadata

metadata = get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'miRNA-metadata.xlsx',rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'reporter-metadata.xlsx')
#display(metadata)
data = data.merge(metadata, how='left', on=['rep-construct','miR-construct'])
display(data)

# Create dicts to specify colors/markers
metadata_dict = metadata.set_index(['miR','ts-num']).to_dict('dict')
construct_palette = metadata_dict['color']

Draw mRuby2 gate on untransfected population and gate for transfected cells

In [None]:
gates = pd.DataFrame()
for channel in ['mGL-A', 'mRuby2-A']:
    gates[channel] = data[data['rep-construct']=='NT'].groupby(['Biorep'])[channel].apply(lambda x: x.quantile(0.999))
gates.reset_index(inplace=True)

gates.sort_values(['Biorep'], inplace=True)
gates['marker'] = ['mRuby2-A']*3
gates['output'] = ['mGL-A']*3

display(gates)

In [None]:
def gate_data(df, gates):
    df = df.copy()
    exp = df['Biorep'].values[0] # the same for entire df, assuming df = data.groupby('exp')
    gates_dict = gates.set_index('Biorep').to_dict('dict') # format: column -> {index: value}
    marker = gates_dict['marker'][exp]
    df['expressing'] = df[marker] > gates_dict[marker][exp]
    df['marker'] = df[marker]
    df['output'] = df[gates_dict['output'][exp]]
    return df

data = data.groupby('Biorep')[data.columns].apply(lambda x: gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
max = 1e6
promoter_order = {k:v for k,v in enumerate(['hPGK.d','CMV'])}
df = data[(data['expressing']) & (data['output']<max)].sort_values(by='miR-promoter', key=lambda x: x.map(promoter_order))
display(df)

### Transfection marker binning

In [None]:
df = df.loc[(df['miR-construct']!='na')]

# Bin by transfection marker
min_count = 100
df['bin_marker'] = df.groupby(['rep-construct','miR-construct','Biorep'])['marker'].transform(lambda x: pd.cut(x, np.logspace(2,6,15)))
df['remove_bin'] = df.groupby(['rep-construct','miR-construct','Biorep','bin_marker'])['bin_marker'].transform(lambda x: x.count() < min_count)
df_binned = df.loc[df['remove_bin']==False]
df_binned['marker'] = df_binned['marker'].astype(float)
df_binned['output'] = df_binned['output'].astype(float)

# Group and compute stats
stat_list = [np.mean, np.std, sp.stats.gmean, sp.stats.gstd, sp.stats.variation]
grouped = df_binned.groupby(by=['rep-construct','miR-construct','Biorep','bin_marker'])
stats_bin = grouped[['marker','output']].agg(stat_list).reset_index().dropna()

# Rename columns as 'col_stat'
stats_bin.columns = stats_bin.columns.map(lambda i: base.rename_multilevel_cols(i))
stats_bin['count'] = grouped['marker'].count().reset_index()['marker']
stats_bin = stats_bin.merge(metadata, how='left', on=['rep-construct','miR-construct'])

# Compute mean/median on bin span
df_binned['bin_marker_mean'] = df_binned['bin_marker'].map(lambda x: np.mean([x.left, x.right]))
df_binned['bin_marker_median'] = df_binned['bin_marker'].map(lambda x: np.median([x.left, x.right]))
stats_bin['bin_marker_mean'] = stats_bin['bin_marker'].map(lambda x: np.mean([x.left, x.right]))
stats_bin['bin_marker_median'] = stats_bin['bin_marker'].map(lambda x: np.median([x.left, x.right]))

display(stats_bin)

In [None]:
# Normalize output to gmean of output in smallest bin, and normalize marker bin by smallest bin
def normalize_output(df):
    df = df.copy()
    normalizer = sp.stats.gmean(df.loc[(df['bin_marker_median']==df['bin_marker_median'].min()), 'output'])
    df['output_norm'] = df['output'].astype(float) / normalizer
    df['bin_marker_median_norm'] = df['bin_marker_median'].astype(float) / df['bin_marker_median'].min()
    return df

by = ['rep-construct','miR-construct','Biorep']
df_binned = df_binned.groupby(by)[df_binned.columns].apply(normalize_output).reset_index(drop=True)
display(df_binned)

In [None]:
# Cache data
df_binned.loc[:, ~df_binned.columns.isin(['color','bin_marker'])].to_parquet(rd.outfile(output_path/'df_binned.gzip'))

In [None]:
ts_num_palette = {
    0: base.colors['gray'],
    1: base.colors['red'],
    2: get_medium_color(base.colors['red']),#base.colors['blue'],
    4: get_light_color(base.colors['red'])#base.colors['purple']
}

g = sns.relplot(data=df_binned, row = 'miR-promoter', col='miR', facet_kws=dict(sharex=True, sharey=True, margin_titles=True,), kind='line',
                height=4, aspect=0.75, x='bin_marker_median', y='output_norm', hue='ts-num', #row_order=[0,1,2,3], 
                legend=True, dashes=False, palette=ts_num_palette,
                estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
g.set(xscale='log', yscale='log', ylim=(2e-1,2e3),xlabel='mRuby2 (MFI)',ylabel='mGL (MFI)')
g.set_titles(col_template="miR-{col_name}", row_template="miR promoter = {row_name}")
g.legend.set_title("TS#")
sns.despine()
for name, ax in g.axes_dict.items(): ax.grid()
g.figure.savefig(rd.outfile(output_path/(f'line_gmean.svg')), bbox_inches='tight')
g.figure.savefig(rd.outfile(output_path/(f'line_gmean.png')), bbox_inches='tight')

### Histograms

In [None]:
g = sns.displot(data=df.loc[df['miR-construct']!='na'], x='output', hue='ts-num', palette=ts_num_palette, row = 'miR-promoter', col='miR', kind='kde',
                facet_kws=dict(margin_titles=True, sharex=True, sharey=True), log_scale=True, legend=True, common_norm=False,height=4, aspect=0.75,
                col_order = ['FF3','FF4','FF5','FF6'])
g.set(xlabel='mGL (MFI)')
g.set_titles(col_template="miR-{col_name}", row_template="miR promoter = {row_name}")
g.legend.set_title("TS#")
g.figure.savefig(rd.outfile(output_path/(f'hist.svg')), bbox_inches='tight')
g.figure.savefig(rd.outfile(output_path/(f'hist.png')), bbox_inches='tight')