In [None]:
import base
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

## Setup

- Load data
- Add metadata
- Draw gates
- Gate transfected cells

Result from this section: DataFrame `df` representing transfected cells.

Load exp93, which compares single-transcript, dual-transcript, and dual-vector implementations

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'
exp93_path = base_path/'2024.04.14_exp93'/'export'

plates = pd.DataFrame({
    'data_path': [exp93_path/'plate1', exp93_path/'plate2', exp93_path/'plate3',],
    'yaml_path': [exp93_path/'exp93_wells.yaml']*3,
    'biorep': [1, 2, 3],
    'exp': ['exp93']*3,
})

output_path = rd.rootdir/'output'/'two-gene'
cache_path = rd.rootdir/'output'/'two-gene'/'data.gzip'

metadata_keys = set()
for p in plates['yaml_path'].unique():
    rd.plot.plot_well_metadata(p)
    metadata_keys.update(rd.flow.load_well_metadata(p).keys())
display(metadata_keys)

In [None]:
# Load data
data = pd.DataFrame()
if cache_path.is_file(): data = pd.read_parquet(cache_path)
else: 
    channel_list = ['mRuby2-A','FSC-A','SSC-A','SNAP-647-A','mGL-A']
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data = data[data[c]>0]
    
    data.dropna(inplace=True)
    data.to_parquet(rd.outfile(cache_path))
display(data)

In [None]:
# Add metadata for constructs
metadata = base.get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
data = data.merge(metadata, how='left', on='construct')

# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
construct_palette = metadata_dict['color']
construct_markers = metadata_dict['markers']

# Rename far-red channel
data.rename(columns={'SNAP-647-A': 'iRFP-A'}, inplace=True)
display(data)

In [None]:
# Add metadata for second construct
metadata_construct2 = pd.read_excel(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct2-metadata.xlsx')
display(metadata_construct2)
data = data.merge(metadata_construct2, how='left', on='construct2')
display(data)

In [None]:
data['condition'] = data['construct'] + '_' + data['construct2']

Draw gates based on untransfected population, then gate transfected cells

In [None]:
gates = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A']
for channel in channel_list:
    gates[channel] = data[data['construct']=='GEEC555'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates.reset_index(inplace=True)

# Indicate which channels are relevant for each experiment
gates['marker'] = 'iRFP-A'
gates['output'] = 'mRuby2-A'

# Add missing iRFP gate
gate_iRFP = 2.5e2
g = sns.displot(data=data, x='iRFP-A', col='biorep', hue='construct', palette=construct_palette,
                 kind='kde', fill=False, legend=False, common_norm=True, log_scale=True)
for name, ax in g.axes_dict.items():
    ax.axvline(gate_iRFP, color='black')
g.figure.savefig(rd.outfile(output_path/(f'hist_gate_iRFP.svg')), bbox_inches='tight')

gates['iRFP-A'] = [gate_iRFP]

display(gates)

Gate data per experiment based on transfection marker expression

In [None]:
data = data.groupby('exp')[data.columns].apply(lambda x: base.gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
max = 1e6
df = data[(data['expressing']) & (data['output']<max)]
display(df)

In [None]:
df['marker'] = df['marker'].astype(float)
df['output'] = df['output'].astype(float)

# Group and compute stats
stat_list = [np.mean, np.std, sp.stats.gmean, sp.stats.gstd, sp.stats.variation]
grouped = df.groupby(by=['condition','construct','construct2','biorep','exp'])
stats = grouped[['marker','output']].agg(stat_list).reset_index().dropna()

# Rename columns as 'col_stat'
stats.columns = stats.columns.map(lambda i: base.rename_multilevel_cols(i))
stats['count'] = grouped['marker'].count().reset_index()['marker']
stats = stats.merge(metadata, how='left', on='construct')
stats.sort_values(['design','ts_kind','ts_num'], inplace=True)
stats = stats.merge(metadata_construct2, how='left', on='construct2')

display(stats)

metadata['TS'] = metadata['ts_kind']
stat_name = {'output_gmean': 'Geometric\nmean', 'output_std': 'Standard\ndeviation', 'output_variation': 'Coefficient\nof variation'}

In [None]:
# Bin by transfection marker
min_count = 100
df['bin_marker'] = df.groupby(['condition','construct','construct2','exp'])['marker'].transform(lambda x: pd.cut(x, np.logspace(2,6,15)))
df['remove_bin'] = df.groupby(['condition','construct','construct2','exp','bin_marker'])['bin_marker'].transform(lambda x: x.count() < min_count)
df_binned = df[~df['remove_bin']].copy()
df_binned['marker'] = df_binned['marker'].astype(float)
df_binned['output'] = df_binned['output'].astype(float)

# Group and compute stats
stat_list = [np.mean, np.std, sp.stats.gmean, sp.stats.gstd, sp.stats.variation]
grouped = df_binned.groupby(by=['condition','construct','construct2','biorep','exp','bin_marker'])
stats_bin = grouped[['marker','output']].agg(stat_list).reset_index().dropna()

# Rename columns as 'col_stat'
stats_bin.columns = stats_bin.columns.map(lambda i: base.rename_multilevel_cols(i))
stats_bin['count'] = grouped['marker'].count().reset_index()['marker']
stats_bin = stats_bin.merge(metadata, how='left', on='construct')

# Compute mean/median on bin span
df_binned['bin_marker_mean'] = df_binned['bin_marker'].map(lambda x: np.mean([x.left, x.right]))
df_binned['bin_marker_median'] = df_binned['bin_marker'].map(lambda x: np.median([x.left, x.right]))
stats_bin['bin_marker_mean'] = stats_bin['bin_marker'].map(lambda x: np.mean([x.left, x.right]))
stats_bin['bin_marker_median'] = stats_bin['bin_marker'].map(lambda x: np.median([x.left, x.right]))

display(stats_bin)

In [None]:
# Combine bioreps
df['remove_bin_combined'] = df.groupby(['condition','construct','construct2','bin_marker'])['bin_marker'].transform(lambda x: x.count() < min_count)
df_binned_combined = df[~df['remove_bin_combined']].copy()

stat_list = [np.mean, np.std, sp.stats.gmean, sp.stats.gstd, sp.stats.variation]
grouped = df_binned_combined.groupby(by=['condition','construct','construct2','bin_marker'])
stats_bin_combined = grouped[['marker','output']].agg(stat_list).reset_index().dropna()

stats_bin_combined.columns = stats_bin_combined.columns.map(lambda i: base.rename_multilevel_cols(i))
stats_bin_combined['count'] = grouped['marker'].count().reset_index()['marker']
stats_bin_combined = stats_bin_combined.merge(metadata, how='left', on='construct')
stats_bin_combined = stats_bin_combined.merge(metadata_construct2, how='left', on='construct2')

df_binned_combined['bin_marker_mean'] = df_binned_combined['bin_marker'].map(lambda x: np.mean([x.left, x.right]))
df_binned_combined['bin_marker_median'] = df_binned_combined['bin_marker'].map(lambda x: np.median([x.left, x.right]))
stats_bin_combined['bin_marker_mean'] = stats_bin_combined['bin_marker'].map(lambda x: np.mean([x.left, x.right]))
stats_bin_combined['bin_marker_median'] = stats_bin_combined['bin_marker'].map(lambda x: np.median([x.left, x.right]))

In [None]:
# Normalize output to gmean of output in smallest bin, and normalize marker bin by smallest bin
def normalize_output(df):
    df = df.copy()
    normalizer = sp.stats.gmean(df.loc[(df['bin_marker_median']==df['bin_marker_median'].min()), 'output'])
    df['output_norm'] = df['output'].astype(float) / normalizer
    df['bin_marker_median_norm'] = df['bin_marker_median'].astype(float) / df['bin_marker_median'].min()
    return df

by = ['condition','construct','construct2','biorep','exp']
df_binned = df_binned.groupby(by)[df_binned.columns].apply(normalize_output).reset_index(drop=True)
display(df_binned)

# Cache data
df_binned.loc[:, ~df_binned.columns.isin(['color','bin_marker'])].to_parquet(rd.outfile(output_path/('df_binned.gzip')))

In [None]:
plot_df = df_binned[~(df_binned['construct2'].isin(['none','JX00']))]
g = sns.relplot(data=plot_df, row='biorep', col='condition', facet_kws=dict(sharex=True, sharey=True, margin_titles=True,), kind='line',
                    height=3, aspect=1.3, x='bin_marker_median', y='mGL-A', hue='construct',
                    palette=construct_palette, legend=False, dashes=False, style='construct', markers=construct_markers,
                    estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
g.set(xscale='log', yscale='log',)
sns.despine()
for name, ax in g.axes_dict.items(): ax.grid()
g.figure.savefig(rd.outfile(output_path/(f'line_iRFP-mGL.svg')), bbox_inches='tight')

mGL expression tracks closely with iRFP expression, indicating that the transfection marker can be used as a proxy for miR amount. (As expected for co-transfections)

In [None]:
plot_df = df_binned[~(df_binned['construct2'].isin(['none','JX00']))]
g = sns.relplot(data=plot_df, row='biorep', col='group', facet_kws=dict(sharex=True, sharey=True, margin_titles=True,), kind='line',
                    height=3, aspect=1.3, x='bin_marker_median', y='output', hue='construct',
                    palette=construct_palette, legend=False, dashes=False, style='construct', markers=construct_markers,
                    estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
g.set(xscale='log', yscale='log',)
sns.despine()
for name, ax in g.axes_dict.items(): ax.grid()
g.figure.savefig(rd.outfile(output_path/(f'line_marker-output.svg')), bbox_inches='tight')

In [None]:
plot_df = df_binned[df_binned['biorep']==2]
g = sns.relplot(data=plot_df, row='construct2', col='group', facet_kws=dict(sharex=True, sharey=True, margin_titles=True,), kind='line',
                    height=3, aspect=1.3, x='bin_marker_median', y='output', hue='construct',
                    palette=construct_palette, legend=False, dashes=False, style='construct', markers=construct_markers,
                    estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
g.set(xscale='log', yscale='log',)
sns.despine()
for name, ax in g.axes_dict.items(): ax.grid()
g.figure.savefig(rd.outfile(output_path/(f'line_marker-output_biorep2.svg')), bbox_inches='tight')

In [None]:
plot_df = df_binned
label = {'none': 'dual-transcript', 'RC239': 'single-transcript', 'RC227': 'dual-vector (EF1a)', 'JX00': 'dual-vector (U6)'}
g = sns.relplot(data=plot_df, row='biorep', col='construct2', facet_kws=dict(sharex=True, sharey=True, margin_titles=True,), kind='line',
                    height=4, aspect=1, x='bin_marker_median', y='output', hue='construct',
                    palette=construct_palette, legend=False, dashes=False, style='construct', markers=construct_markers,
                    estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
g.set(xscale='log', yscale='log',)
sns.despine()
for (biorep, construct2), ax in g.axes_dict.items(): 
    ax.grid()
    if biorep==1: ax.set(title=label[construct2])
g.figure.savefig(rd.outfile(output_path/(f'line_marker-output_construct2.svg')), bbox_inches='tight')

In [None]:
plot_df = stats[stats['group']!='marker']
plot_df.sort_values(['construct2', 'design','ts_kind'], inplace=True)
stat_list = ['output_gmean', 'output_std', 'output_variation']
fig, axes = plt.subplots(3,1, figsize=(12,12), gridspec_kw=dict(hspace=0.6))
for i,stat in enumerate(stat_list):
    ax = axes[i]
    sns.scatterplot(data=plot_df, x='condition', y=stat, hue='construct', palette=construct_palette,
                legend=False, s=100, ax=ax, style='construct', markers=construct_markers)
    ax.set(xlabel='')
    if stat != 'output_variation': ax.set(yscale='log')
    else: ax.set(ylim=(0,ax.get_ylim()[1]))
    rd.plot.generate_xticklabels(plot_df.drop_duplicates('condition'), 'condition', ['gene', 'ts_kind', 'ts_loc'], ax=ax)
    sns.despine(ax=ax)

fig.savefig(rd.outfile(output_path/(f'stat_controller.svg')), bbox_inches='tight')