In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

### Load data

Load data from 293T, MEF lenti infections (`data_lenti`)

In [None]:
base_path_1 = rd.datadir/'instruments'/'data'/'attune'/'kasey'/'2024.04.05_exp89'/'export'
base_path_2 = rd.datadir/'instruments'/'data'/'attune'/'chris'/'2024.06.02-exp95-lenti-miR-iFFL'/'export'
plate_list = ['_'.join(x) for x in zip(
        ['plate'+str(i) for i in range(1,10)], 
        (['293T']*3 + ['MEF2A']*3 + ['MEF8A']*3),
        ['P9','P14','P15']*3
    )]

plates = pd.DataFrame({
    'data_path': [base_path_1/'293T_control', 
                  base_path_1/'293T_plate1', base_path_1/'293T_plate2', base_path_1/'293T_plate3',
                  base_path_1/'MEF_3_plate1', 
                  base_path_1/'MEF_4-1_plate1', base_path_1/'MEF_4-1_plate2', base_path_1/'MEF_4-1_plate3'] +
                 [base_path_2/p for p in plate_list],
    'yaml_path': [base_path_1/'kasey_yaml2'/'plate_control.yaml', 
                  base_path_1/'kasey_yaml2'/'plate01.yaml', base_path_1/'kasey_yaml2'/'plate02.yaml', base_path_1/'kasey_yaml2'/'plate03.yaml',
                  base_path_1/'kasey_yaml2'/'mef_3_plate01.yaml', 
                  base_path_1/'kasey_yaml2'/'mef_4-1_plate01.yaml', base_path_1/'kasey_yaml2'/'mef_4-1_plate02.yaml', base_path_1/'kasey_yaml2'/'mef_4-1_plate03.yaml'] +
                 [base_path_2/(p+'_metadata.yaml') for p in plate_list],
})

cache_path = rd.rootdir/'output'/'fig_designs'/'data_lenti.gzip'
output_path = rd.rootdir/'output'/'fig_applications'
metadata_path = rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx'

# Load data
data_lenti = pd.DataFrame()
if cache_path.is_file(): data_lenti = pd.read_parquet(cache_path)
else: 
    channel_list = ['mCherry-A','mRuby2-A','FSC-A','SSC-A','tagBFP-A','mGL-A']
    data_lenti = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data_lenti = data_lenti[data_lenti[c]>0]
    
    data_lenti.dropna(inplace=True)
    data_lenti.to_parquet(rd.outfile(cache_path))

# Add metadata for constructs
metadata = base.get_metadata(metadata_path, style='designs')
data_lenti = data_lenti.merge(metadata, how='left', on='construct')
data_lenti['cell'] = data_lenti['cell_type'].apply(lambda x: x.split('-')[0])
data_lenti['exp'] = data_lenti['cell_type'] + '_' + data_lenti['virus_batch']

def map_biorep(df):
    biorep_map = {val:i for i,val in enumerate(df['exp'].unique())}
    d = df.copy()
    d['biorep'] = d['exp'].map(biorep_map)
    return d

data_lenti = data_lenti.groupby('cell')[data_lenti.columns].apply(map_biorep).reset_index(drop=True)
display(data_lenti)

In [None]:
# Gate cells
gates_lenti = pd.DataFrame()
channel_list = ['mGL-A', 'mCherry-A', 'mRuby2-A']
for channel in channel_list:
    gates_lenti[channel] = data_lenti[(data_lenti['virus_dilution']==0)].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.9999)) # adjust gate to better isolate marker+ pop
gates_lenti.reset_index(inplace=True)

# Add missing gates
gates_lenti.loc[len(gates_lenti.index)] = ['293T_P10'] + list(gates_lenti.loc[gates_lenti['exp']=='293T_na', channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['293T_P14_'] + list(gates_lenti.loc[gates_lenti['exp']=='293T_na', channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['293T_P16'] + list(gates_lenti.loc[gates_lenti['exp']=='293T_na', channel_list].mean().values) 
gates_lenti.loc[len(gates_lenti.index)] = ['MEF-3_P10'] + list(gates_lenti.loc[gates_lenti['exp'].str.contains('MEF'), channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['MEF-4-1_P10'] + list(gates_lenti.loc[gates_lenti['exp'].str.contains('MEF'), channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['MEF-4-1_P14'] + list(gates_lenti.loc[gates_lenti['exp'].str.contains('MEF'), channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['MEF-4-1_P16'] + list(gates_lenti.loc[gates_lenti['exp'].str.contains('MEF'), channel_list].mean().values)

# Indicate which channels are relevant for each experiment
gates_lenti.sort_values(['exp'], inplace=True)
gates_lenti['marker'] = 'mGL-A'
gates_lenti['output'] = 'mRuby2-A'

# Gate data by marker expression
data_lenti = data_lenti.groupby(['cell_type','virus_batch'])[data_lenti.columns].apply(lambda x: base.gate_data(x,gates_lenti))
data_lenti.reset_index(inplace=True, drop=True)
df_lenti = data_lenti[(data_lenti['expressing']) & (data_lenti['virus_dilution']!=0)]

# Since there is no marker-only condition, save the output expression stats for untransduced cells
baseline_df = data_lenti[(data_lenti['virus_dilution']==0)].groupby(['exp','cell'])['output'].apply(sp.stats.gmean).reset_index()

In [None]:
# Bin data and calculate statistics
df_lenti['output'] = df_lenti['output'].astype(float)
df_lenti['marker'] = df_lenti['marker'].astype(float)
df_quantiles_lenti, stats_lenti, stats_quantiles_lenti, fits_lenti = base.calculate_bins_stats(df_lenti, by=['exp','cell','biorep','construct','dox','virus_dilution'], num_bins=20)
stats_lenti = stats_lenti.merge(metadata, how='left', on='construct')
fits_lenti = fits_lenti.merge(metadata, how='left', on='construct')

In [None]:
# output range of 5-95 percentile
by = ['exp','cell','biorep','construct','dox','virus_dilution']
p_5 = df_lenti.groupby(by)[['output']].apply(lambda x: np.percentile(x, 5)).rename('output_5th').reset_index()
p_95 = df_lenti.groupby(by)[['output']].apply(lambda x: np.percentile(x, 95)).rename('output_95th').reset_index()
stats_lenti = stats_lenti.merge(p_5, how='left')
stats_lenti = stats_lenti.merge(p_95, how='left')
stats_lenti['output_range'] = stats_lenti['output_95th'] - stats_lenti['output_5th']
stats_lenti['output_range_log'] = stats_lenti['output_95th'].apply(np.log10) - stats_lenti['output_5th'].apply(np.log10)

# fraction within 1 order of magnitude (10x) around median
def get_high_low(df):
    median = df['output'].median()
    counts = df.loc[(df['output']>(median / (10**(0.5)))) & (df['output']<(median * (10**(0.5)))), 'output'].count() 
    return counts / df['output'].count()

fraction = df_lenti.groupby(by)[df_lenti.columns].apply(get_high_low).rename('fraction_within_10x').reset_index()
stats_lenti = stats_lenti.merge(fraction, how='left')
display(fraction)

In [None]:
# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
main_palette = metadata_dict['color']
main_markers = metadata_dict['markers']

ts_label = {'na': 'base', 'NT': 'OL', 'T': 'CL', 'none': '–'}
metadata['ts_label'] = metadata['ts_kind'].replace(ts_label)

In [None]:
# Load T cell lenti data
tcell_path = rd.datadir/'instruments'/'data'/'collaborators'/'birnbaum_steph'/'2024-06-10 Galloway Exp 1'
cache_path = output_path/'data_tcell.gzip'

data_tcell = pd.DataFrame()
if cache_path.is_file(): data_tcell = pd.read_parquet(cache_path)
else: 
    channels = ['FSC-A', 'SSC-A', 'FITC-A', 'PE-A', 'APC-A750-A']
    data_tcell = rd.flow.load_csv_with_metadata(tcell_path/'export', tcell_path/'metadata.yaml', columns=channels)

    # Remove negative channel values
    for c in channels: data_tcell = data_tcell[data_tcell[c]>0]

    data_tcell = data_tcell.rename({'FITC-A': 'mGL-A', 'PE-A': 'mRuby2-A', 'APC-A750-A': 'livedead-A'}, axis=1)
    data_tcell.to_parquet(rd.outfile(cache_path))

# Add metadata for constructs
data_tcell = data_tcell.merge(metadata, how='left', on='construct')
data_tcell['exp'] = 'steph'
data_tcell['biorep'] = 1
display(data_tcell)

In [None]:
# Gate cells
gates_tcell = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A']
for channel in channel_list:
    gates_tcell[channel] = data_tcell[data_tcell['construct']=='UT'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.99995)) # adjust gate to better isolate marker+ pop
gates_tcell.reset_index(inplace=True)

# Indicate which channels are relevant for each experiment
gates_tcell.sort_values(['exp'], inplace=True)
gates_tcell['marker'] = 'mGL-A'
gates_tcell['output'] = 'mRuby2-A'

# Gate data by marker expression and live/dead stain
data_tcell = data_tcell.groupby('exp')[data_tcell.columns].apply(lambda x: base.gate_data(x,gates_tcell))
data_tcell.reset_index(inplace=True, drop=True)
livedead_gate = 2000
df_tcell = data_tcell[(data_tcell['expressing']) & (data_tcell['construct']!='UT') & (data_tcell['livedead-A']<livedead_gate)]

# Since there is no marker-only condition, save the output expression stats for untransduced cells
baseline_df_tcell = data_tcell[(data_tcell['construct']=='UT')].groupby(['exp'])['output'].apply(sp.stats.gmean).reset_index()

In [None]:
# Bin data and calculate statistics
df_quantiles_tcell, stats_tcell, _, fits_tcell = base.calculate_bins_stats(df_tcell, by=['exp','biorep','construct','dox','moi'], num_bins=20)
stats_tcell = stats_tcell.merge(metadata, how='left', on='construct')
fits_tcell = fits_tcell.merge(metadata, how='left', on='construct')

In [None]:
# output range of 5-95 percentile
by = ['exp','biorep','construct','dox','moi']
p_5 = df_tcell.groupby(by)[['output']].apply(lambda x: np.percentile(x, 5)).rename('output_5th').reset_index()
p_95 = df_tcell.groupby(by)[['output']].apply(lambda x: np.percentile(x, 95)).rename('output_95th').reset_index()
stats_tcell = stats_tcell.merge(p_5, how='left')
stats_tcell = stats_tcell.merge(p_95, how='left')
stats_tcell['output_range'] = stats_tcell['output_95th'] - stats_tcell['output_5th']
stats_tcell['output_range_log'] = stats_tcell['output_95th'].apply(np.log10) - stats_tcell['output_5th'].apply(np.log10)

# fraction within 1 order of magnitude (10x) around median
fraction = df_tcell.groupby(by)[df_tcell.columns].apply(get_high_low).rename('fraction_within_10x').reset_index()
stats_tcell = stats_tcell.merge(fraction, how='left')

Load rat cortical neuron lenti infection (`data_neuron`)

In [None]:
# Load data
neuron_path = rd.datadir/'instruments'/'data'/'attune'/'chris'/'2024.06.15-rat-neurons'
cache_path = output_path/'data_neuron.gzip'

data_neuron = pd.DataFrame()
if cache_path.is_file(): data_neuron = pd.read_parquet(cache_path)
else: 
    channels = ['FSC-A', 'SSC-A', 'mGL-A', 'mRuby2-A', 'mCherry-A']
    data_neuron = rd.flow.load_csv_with_metadata(neuron_path/'export', neuron_path/'metadata.yaml', columns=channels)

    # Remove negative channel values
    for c in channel_list: data_neuron = data_neuron[data_neuron[c]>0]
    
    data_neuron.to_parquet(rd.outfile(cache_path))

# Add metadata for constructs
data_neuron = data_neuron.merge(metadata, how='left', on='construct')
data_neuron['exp'] = 'cpj'
display(data_neuron)

In [None]:
# Gate cells
gates_neuron = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A']
for channel in channel_list:
    gates_neuron[channel] = data_neuron[data_neuron['construct']=='UT'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates_neuron.reset_index(inplace=True)

# Indicate which channels are relevant for each experiment
gates_neuron.sort_values(['exp'], inplace=True)
gates_neuron['marker'] = 'mGL-A'
gates_neuron['output'] = 'mRuby2-A'

# Adjust marker gate to better isolate infected population
gates_neuron['mGL-A'] = 1e3

# Gate data by marker expression
data_neuron = data_neuron.groupby('exp')[data_neuron.columns].apply(lambda x: base.gate_data(x,gates_neuron))
data_neuron.reset_index(inplace=True, drop=True)
df_neuron = data_neuron[(data_neuron['expressing']) & (data_neuron['construct']!='UT')]

# Since there is no marker-only condition, save the output expression stats for untransduced cells
baseline_df_neuron = data_neuron[(data_neuron['construct']=='UT')].groupby(['exp'])['output'].apply(sp.stats.gmean).reset_index()

In [None]:
# Bin data and calculate statistics
df_quantiles_neuron, stats_neuron, _, fits_neuron = base.calculate_bins_stats(df_neuron, by=['construct','moi'], num_bins=10)
stats_neuron = stats_neuron.merge(metadata, how='left', on='construct')
fits_neuron = fits_neuron.merge(metadata, how='left', on='construct')

In [None]:
# output range of 5-95 percentile
by = ['construct','moi']
p_5 = df_neuron.groupby(by)[['output']].apply(lambda x: np.percentile(x, 5)).rename('output_5th').reset_index()
p_95 = df_neuron.groupby(by)[['output']].apply(lambda x: np.percentile(x, 95)).rename('output_95th').reset_index()
stats_neuron = stats_neuron.merge(p_5, how='left')
stats_neuron = stats_neuron.merge(p_95, how='left')
stats_neuron['output_range'] = stats_neuron['output_95th'] - stats_neuron['output_5th']
stats_neuron['output_range_log'] = stats_neuron['output_95th'].apply(np.log10) - stats_neuron['output_5th'].apply(np.log10)

# fraction within 1 order of magnitude (10x) around median
fraction = df_neuron.groupby(by)[df_neuron.columns].apply(get_high_low).rename('fraction_within_10x').reset_index()
stats_neuron = stats_neuron.merge(fraction, how='left')

Load therapeutic neuro genes transfection (`data_therapeutic`)

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'Emma'
rep1_2_path = base_path/'2024.06.05_EXP56'/'data_singlets'
rep3_path = base_path/'2024.06.09_EXP60'/'data_singlets'

plates = pd.DataFrame({
    'data_path': [rep1_2_path, rep1_2_path, rep3_path],
    'yaml_path': [rep1_2_path/'elp_exp56_biorep_1_wells.yaml', rep1_2_path/'elp_exp56_biorep_2_wells.yaml', rep3_path/'elp_exp56_biorep_3_wells.yaml'],
    'exp': ['elp_exp56.1','elp_exp56.2','elp_exp60'],
    'biorep': [1,2,3]
})

cache_path = output_path/'data_therapeutics.gzip'

# Load data
data_therapeutics = pd.DataFrame()
if cache_path.is_file(): data_therapeutics = pd.read_parquet(cache_path)
else: 
    channel_list = ['mRuby2-A','EGFP-A','iRFP-A']
    data_therapeutics = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data_therapeutics = data_therapeutics[data_therapeutics[c]>0]
    
    data_therapeutics.dropna(inplace=True)
    data_therapeutics.to_parquet(rd.outfile(cache_path))

# Add metadata for constructs
metadata2 = base.get_metadata(metadata_path, style='applications')
data_therapeutics = data_therapeutics.merge(metadata2, how='left', on='construct')

In [None]:
# Create dicts to specify colors/markers
metadata_dict2 = metadata2.set_index('construct').to_dict('dict')
app_palette = metadata_dict2['color']
app_markers = metadata_dict2['markers']

In [None]:
# Gate cells
gates_therapeutics = pd.DataFrame()
channel_list = ['EGFP-A', 'mRuby2-A', 'iRFP-A']
for channel in channel_list:
    gates_therapeutics[channel] = data_therapeutics[data_therapeutics['construct']=='UT'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates_therapeutics.reset_index(inplace=True)

# Indicate which channels are relevant for each experiment
gates_therapeutics['marker'] = 'iRFP-A'
gates_therapeutics['output'] = 'mRuby2-A'

# Gate data by marker expression
data_therapeutics = data_therapeutics.groupby('exp')[data_therapeutics.columns].apply(lambda x: base.gate_data(x,gates_therapeutics))
data_therapeutics.reset_index(inplace=True, drop=True)
data_therapeutics.loc[data_therapeutics['name'].str.contains('FMRP'), 'output'] = data_therapeutics.loc[data_therapeutics['name'].str.contains('FMRP'), 'EGFP-A']
df_therapeutics = data_therapeutics[(data_therapeutics['expressing']) & (data_therapeutics['construct']!='UT')]

In [None]:
# Bin data and calculate statistics
df_quantiles_therapeutics, stats_therapeutics, _, fits_therapeutics = base.calculate_bins_stats(df_therapeutics, by=['exp','biorep','construct'], num_bins=20)
stats_therapeutics = stats_therapeutics.merge(metadata2, how='left', on='construct')
fits_therapeutics = fits_therapeutics.merge(metadata2, how='left', on='construct')

In [None]:
# output range of 5-95 percentile
by = ['exp','biorep','construct']
p_5 = df_therapeutics.groupby(by)[['output']].apply(lambda x: np.percentile(x, 5)).rename('output_5th').reset_index()
p_95 = df_therapeutics.groupby(by)[['output']].apply(lambda x: np.percentile(x, 95)).rename('output_95th').reset_index()
stats_therapeutics = stats_therapeutics.merge(p_5, how='left')
stats_therapeutics = stats_therapeutics.merge(p_95, how='left')
stats_therapeutics['output_range'] = stats_therapeutics['output_95th'] - stats_therapeutics['output_5th']
stats_therapeutics['output_range_log'] = stats_therapeutics['output_95th'].apply(np.log10) - stats_therapeutics['output_5th'].apply(np.log10)

# fraction within 1 order of magnitude (10x) around median
fraction = df_therapeutics.groupby(by)[df_therapeutics.columns].apply(get_high_low).rename('fraction_within_10x').reset_index()
stats_therapeutics = stats_therapeutics.merge(fraction, how='left')

### Set up figure

In [None]:
base_size = base.font_sizes['base_size']
smaller_size = base.font_sizes['smaller_size']

sns.set_style('ticks')
sns.set_context('paper', font_scale=1.0, rc={'font.size': base_size, 'font.family': 'sans-serif', 'font.sans-serif':['Arial']})
plt.rcParams.update({'axes.titlesize': base_size, 'axes.labelsize': base_size, 'xtick.labelsize': smaller_size, 'ytick.labelsize': smaller_size,
                     'pdf.fonttype': 42, 
                     'ytick.major.size': 3, 'xtick.major.size': 3, 'ytick.minor.size': 2, 'ytick.major.pad': 2, 'xtick.major.pad': 2, 
                     'lines.linewidth': 1,
                     'axes.spines.right': False, 'axes.spines.top': False, 'axes.labelpad': 2})

In [None]:
# Create the overall figure, gridspec, and add subfigure labels
fig = plt.figure(figsize=(5.5, 1+1.5*5)) #4.48819, 6.8504
fig_gridspec = matplotlib.gridspec.GridSpec(6, 2, figure=fig,
    wspace=0.4, hspace=0.4, height_ratios=[1]+[1.5]*5, width_ratios=(3,3))
subfigures = {
    'A': fig.add_subfigure(fig_gridspec[0,0]),
    'A2': fig.add_subfigure(fig_gridspec[0,1]),
    'B': fig.add_subfigure(fig_gridspec[1,0]),
    'B2': fig.add_subfigure(fig_gridspec[1,1]),
    'C': fig.add_subfigure(fig_gridspec[2,0]),
    'C2': fig.add_subfigure(fig_gridspec[2,1]),
    'D': fig.add_subfigure(fig_gridspec[3,0]),
    'D2': fig.add_subfigure(fig_gridspec[3,1]), 
    'E': fig.add_subfigure(fig_gridspec[4,0]),
    'F': fig.add_subfigure(fig_gridspec[4,1]),
    'G': fig.add_subfigure(fig_gridspec[5,0]),
    'G2': fig.add_subfigure(fig_gridspec[5,1]), # H
}
for label, subfig in subfigures.items():
    if '2' in label: continue
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}', fontsize=base.font_sizes['subpanel_label'], 
                                           fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))
scatter_kwargs = dict(s=4, jitter=0.1, linewidth=0.5, edgecolor='white')

fig_name = 'fig_applications.pdf'
fig.savefig(rd.outfile(output_path/fig_name))
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/fig_name))

In [None]:
subfig = subfigures['B']
rd.plot.adjust_subplot_margins_inches(subfig, left=1.5, bottom=0.45, top=0.2, right=0)
axes = subfig.subplots(1,2, gridspec_kw=dict(width_ratios=(1,0.4), wspace=0.3))

plot_df = df_quantiles_lenti[(df_quantiles_lenti['group'].isin(['controller','base'])) & (df_quantiles_lenti['dox']==1000) & 
                             (df_quantiles_lenti['virus_dilution']==1) & (df_quantiles_lenti['cell']=='MEF') &
                              (df_quantiles_lenti['biorep'].isin([1])) & (df_quantiles_lenti['design']<=1)].copy()
plot_df.sort_values(['ts_num','ts_kind'], ascending=False, inplace=True)
plot_order = reversed(plot_df['construct'].unique())

# line plot
ax = axes[0]
sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=main_palette, 
             legend=False, dashes=False, style='construct', markers=main_markers, ax=ax, markersize=4, markeredgewidth=0.5,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)),
             hue_order=plot_order)
ax.set(xscale='log', yscale='log', xlabel='marker', xlim=(4e3,2e5))
sns.despine(ax=ax)
baseline = baseline_df.loc[baseline_df['cell']=='MEF', 'output'].mean()
ax.axhline(baseline, color='black', ls=':')
ax.minorticks_off()

# # add median
# for construct, group in plot_df.groupby('construct'):
#     ax.axhline(group['output'].median(), color=main_palette[construct])
#     ax.axhline(group['output'].median()*np.sqrt(10**(0.5)), color=main_palette[construct])
#     ax.axhline(group['output'].median()/np.sqrt(10**(0.5)), color=main_palette[construct])

# histogram
ax = axes[1]
sns.kdeplot(data=plot_df, y='output', hue='construct', palette=main_palette, 
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax, bottom=True)
ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
ax.get_xaxis().set_visible(False)
ax.minorticks_off()

fig.savefig(rd.outfile(output_path/fig_name))
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/fig_name))

In [None]:
subfig = subfigures['B2']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.3, bottom=0.45, top=0.2, right=0.05)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,2,1), wspace=0.5))

# slope
ax = axes[0]
plot_df2 = fits_lenti[fits_lenti['group'].isin(['controller','base']) & (fits_lenti['dox']==1000) & (fits_lenti['virus_dilution']==1) & 
                      (fits_lenti['cell']=='MEF')&  (fits_lenti['design']<=1)]
for construct, group in plot_df2.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='slope', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlabel='', ylabel='', yticks=[0,0.25,0.5,0.75,1], yticklabels=['0.0','','0.5','','1.0'])
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)

# CDF
ax = axes[1]
xlim_cdf = (1e1,1e5)
bioreps = [6,1]
plot_df = df_quantiles_lenti[(df_quantiles_lenti['group'].isin(['controller','base'])) & (df_quantiles_lenti['dox']==1000) & 
                             (df_quantiles_lenti['virus_dilution']==1) & (df_quantiles_lenti['cell']=='MEF') & 
                             (df_quantiles_lenti['biorep']==1) & (df_quantiles_lenti['design']==1)]
                             #(((df_quantiles_lenti['biorep']==bioreps[1]) & (df_quantiles_lenti['design']==1)) | 
                             # ((df_quantiles_lenti['biorep']==bioreps[0]) & (df_quantiles_lenti['design']==0)))]

sns.kdeplot(data=plot_df, x='output', hue='construct', palette=main_palette, ax=ax,
            cumulative=True, common_norm=False, legend=False, log_scale=True)
ax.set(xticks=np.logspace(1,5,5), ylabel='', xlim=xlim_cdf, title='CDF')
sns.despine(ax=ax)
ax.minorticks_off()
ax.axhline(0.05, color='black', zorder=1, ls=':')
ax.axhline(0.95, color='black', zorder=1, ls=':')
ax.grid(zorder=-1, color=base.get_light_color(base.get_light_color(base.colors['gray'])), which='both',alpha=0.7)

# add 5th & 9th percentile points
plot_df = stats_lenti[(stats_lenti['group'].isin(['controller','base'])) & (stats_lenti['dox']==1000) & 
                             (stats_lenti['virus_dilution']==1) & (stats_lenti['cell']=='MEF') & (stats_lenti['design']<=1)]
for construct, group in plot_df.groupby('construct'):
    design = metadata.loc[metadata['construct']==construct, 'design'].values[0]
    values = [group.loc[group['biorep']==bioreps[design], 'output_5th'].values[0], group.loc[group['biorep']==bioreps[design], 'output_95th'].values[0]]
    ax.scatter(values, [0.05, 0.95], color=main_palette[construct], s=2.25**2*3.14, linewidth=0.5, edgecolor='white', zorder=10)

# width of output range 5-95%
ax = axes[2]
# plot_df2 = fits_lenti[fits_lenti['group'].isin(['controller','base']) & (fits_lenti['dox']==1000) & (fits_lenti['virus_dilution']==1) & 
#                       (fits_lenti['cell']=='MEF')&  (fits_lenti['design']<=1) & (fits_lenti['biorep']==1)]
# for construct, group in plot_df2.groupby('construct'):
#     sns.stripplot(data=group, x='construct', y='slope', hue='construct', palette=main_palette,
#                   legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
# ax.set(title='Slope', xlabel='', ylabel='', yticks=[0,0.25,0.5,0.75,1], yticklabels=['0.0','','0.5','','1.0'])
# rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
# plot_df = stats_lenti[(stats_lenti['group'].isin(['controller','base'])) & (stats_lenti['dox']==1000) & 
#                              (stats_lenti['virus_dilution']==1) & (stats_lenti['cell']=='MEF') & (stats_lenti['design']<=1)]
# plot_df.sort_values(['construct'], inplace=True)

plot_df = stats_lenti[(stats_lenti['group'].isin(['controller','base'])) & (stats_lenti['dox']==1000) & 
                             (stats_lenti['virus_dilution']==1) & (stats_lenti['cell']=='MEF') & (stats_lenti['design']<=1)]

for construct, group in plot_df.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct', y='fraction_within_10x', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(ylabel='', title='Fraction\nw/in 10x', xlabel='',)
sns.despine(ax=ax)
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)

# for construct, group in plot_df.groupby('construct', sort=False):
#     sns.stripplot(data=group, x='construct', y='output_range', hue='construct', palette=main_palette,
#                     legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
# ax.set(yscale='log', ylabel='', title='Output range,\n5$^\\mathrm{th}$ to 95$^\\mathrm{th}$ percentile', xlabel='', ylim=(1e2,2e4))
# rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
# sns.despine(ax=ax)

fig.savefig(rd.outfile(output_path/fig_name))
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/fig_name))

In [None]:
subfig = subfigures['C']
rd.plot.adjust_subplot_margins_inches(subfig, left=1.5, bottom=0.45, top=0.2, right=0)
axes = subfig.subplots(1,2, gridspec_kw=dict(width_ratios=(1,0.4), wspace=0.3))

plot_df = df_quantiles_tcell[(df_quantiles_tcell['dox']==1000) & (df_quantiles_tcell['moi']==1) & (df_quantiles_tcell['biorep']==1)]
plot_df.sort_values(['ts_num','ts_kind'], ascending=False, inplace=True)
plot_order = reversed(plot_df['construct'].unique())

# line plot
ax = axes[0]
sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=main_palette, 
             legend=False, dashes=False, style='construct', markers=main_markers, ax=ax, markersize=4, markeredgewidth=0.5,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)),
             hue_order=plot_order)
ax.set(xscale='log', yscale='log', xlabel='marker',)
sns.despine(ax=ax)
ax.minorticks_on()
ax.yaxis.set_tick_params(which='minor', left=False)

# histogram
ax = axes[1]
sns.kdeplot(data=plot_df, y='output', hue='construct', palette=main_palette, 
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax, bottom=True)
ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
ax.get_xaxis().set_visible(False)
ax.minorticks_off()

fig.savefig(rd.outfile(output_path/fig_name))

In [None]:
subfig = subfigures['C2']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.3, bottom=0.45, top=0.2, right=0.05)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1.5,1), wspace=0.45))

# slope
ax = axes[0]
plot_df2 = fits_tcell[(fits_tcell['dox']==1000) & (fits_tcell['moi']==1)]
for construct, group in plot_df2.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='slope', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlabel='', ylabel='', ylim=(0.7,1.4))
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)

# CDF
ax = axes[1]
xlim_cdf = (2e2,4e6)
plot_df = df_quantiles_tcell[(df_quantiles_tcell['dox']==1000) & (df_quantiles_tcell['moi']==1)]

sns.kdeplot(data=plot_df, x='output', hue='construct', palette=main_palette, ax=ax,
            cumulative=True, common_norm=False, legend=False, log_scale=True)
ax.set(xticks=np.logspace(2,6,5), ylabel='', xlim=xlim_cdf, title='CDF')
sns.despine(ax=ax)
ax.minorticks_off()
ax.grid(zorder=-1, color=base.get_light_color(base.get_light_color(base.colors['gray'])), which='both',alpha=0.7)

# add median points
medians = df_quantiles_tcell[(df_quantiles_tcell['dox']==1000) & (df_quantiles_tcell['moi']==1)].groupby('construct')['output'].median().rename('median').reset_index()
medians_dict = medians.set_index('construct').to_dict('dict')['median']
i = 0
locs = [1.03,1.03,1.06]
# for construct, group in medians.groupby('construct'):
#     ax.scatter(medians_dict[construct], [0.5], color=main_palette[construct], s=2.25**2*3.14, linewidth=0.5, edgecolor='white', zorder=10)
#     left = medians_dict[construct]/(10**0.5)
#     right = medians_dict[construct]*(10**0.5)
#     ax.hlines(y=locs[i], xmin=left, xmax=right, color=main_palette[construct], zorder=1,)
#     ax.vlines(x=[left, right], ymin=locs[i]-0.05, ymax=locs[i]+0.05, color=base.get_light_color(main_palette[construct]), zorder=1,)
#     ax.axvline(left, linewidth=0.5, color=base.get_light_color(main_palette[construct]), zorder=1,)
#     ax.axvline(right, linewidth=0.5, color=base.get_light_color(main_palette[construct]), zorder=1,)
#     ax.scatter(medians_dict[construct], [0.5], color=main_palette[construct], s=2.25**2*3.14, linewidth=0.5, edgecolor='white', zorder=10)
#     i += 1

# % within 10x around median
ax = axes[2]
plot_df = stats_tcell[(stats_tcell['dox']==1000) & (stats_tcell['moi']==1)]

for construct, group in plot_df.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct', y='fraction_within_10x', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(ylabel='', title='Fraction\nw/in 10x', xlabel='', ylim=(0.5,1))
sns.despine(ax=ax)
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)

fig.savefig(rd.outfile(output_path/fig_name))
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/fig_name))

In [None]:
subfig = subfigures['D']
rd.plot.adjust_subplot_margins_inches(subfig, left=1.5, bottom=0.45, top=0.2, right=0)
axes = subfig.subplots(1,2, gridspec_kw=dict(width_ratios=(1,0.4), wspace=0.3))

plot_df = df_quantiles_neuron[(df_quantiles_neuron['moi']==1)]
plot_df.sort_values(['ts_num','ts_kind'], ascending=False, inplace=True)
plot_order = reversed(plot_df['construct'].unique())

# line plot
ax = axes[0]
sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=main_palette, 
             legend=False, dashes=False, style='construct', markers=main_markers, ax=ax, markersize=4, markeredgewidth=0.5,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)),
             hue_order=plot_order)
ax.set(xscale='log', yscale='log', xlabel='marker',)
sns.despine(ax=ax)
ax.minorticks_on()
ax.yaxis.set_tick_params(which='minor', left=False)

# histogram
ax = axes[1]
sns.kdeplot(data=plot_df, y='output', hue='construct', palette=main_palette, 
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax, bottom=True)
ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
ax.get_xaxis().set_visible(False)
ax.minorticks_off()

fig.savefig(rd.outfile(output_path/fig_name))
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/fig_name))

In [None]:
subfig = subfigures['D2']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.3, bottom=0.45, top=0.2, right=0.05)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1.5,1), wspace=0.45))

# slope
ax = axes[0]
plot_df2 = fits_neuron[(fits_neuron['moi']==1)]
for construct, group in plot_df2.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='slope', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlabel='', ylabel='', ylim=(0,1.1)) 
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)

ax = axes[1]
xlim_cdf = (5e1,2e4)
plot_df = df_quantiles_neuron[(df_quantiles_neuron['moi']==1)]

sns.kdeplot(data=plot_df, x='output', hue='construct', palette=main_palette, ax=ax,
            cumulative=True, common_norm=False, legend=False, log_scale=True)
ax.set(xticks=np.logspace(2,6,5), ylabel='', xlim=xlim_cdf, title='CDF')
sns.despine(ax=ax)
ax.minorticks_off()
ax.grid(zorder=-1, color=base.get_light_color(base.get_light_color(base.colors['gray'])), which='both',alpha=0.7)

# % within 10x around median
ax = axes[2]
plot_df = stats_neuron[(stats_neuron['moi']==1)]

for construct, group in plot_df.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct', y='fraction_within_10x', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(ylabel='', title='Fraction\nw/in 10x', xlabel='', ylim=(0.75,1))
sns.despine(ax=ax)
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)

fig.savefig(rd.outfile(output_path/fig_name))
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/fig_name))

In [None]:
subfig = subfigures['G']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.55, bottom=0.45, top=0.2, right=0.1)
axes = subfig.subplots(1,4, gridspec_kw=dict(width_ratios=(1,0.4,0.8,0.8), wspace=0.3))

biorep = 2
plot_df = df_quantiles_therapeutics[(df_quantiles_therapeutics['biorep']==biorep) & (df_quantiles_therapeutics['name'].str.contains('FXN'))]
plot_df.sort_values(['ts_num','ts_kind'], ascending=False, inplace=True)

# line plot
ax = axes[0]
sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=app_palette, 
             legend=False, dashes=False, style='construct', markers=app_markers, ax=ax, markersize=4, markeredgewidth=0.5,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
ax.set(xscale='log', yscale='log', xlabel='marker',)
sns.despine(ax=ax)
marker_baseline = sp.stats.gmean(df_quantiles_therapeutics.loc[(df_quantiles_therapeutics['group']=='marker') & (df_quantiles_therapeutics['biorep']==biorep), 'output'])
ax.axhline(marker_baseline, color='black', ls=':')

# histogram
ax = axes[1]
sns.kdeplot(data=plot_df, y='output', hue='construct', palette=app_palette, 
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax, bottom=True)
ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
ax.get_xaxis().set_visible(False)

plot_df = stats_therapeutics[(stats_therapeutics['name'].str.contains('FXN'))]
plot_df2 = fits_therapeutics[(fits_therapeutics['name'].str.contains('FXN'))]
plot_df.sort_values(['ts_num','ts_kind'], ascending=False, inplace=True)
plot_df2.sort_values(['ts_num','ts_kind'], ascending=False, inplace=True)

# stat gmean
xlim = (-0.5, len(plot_df['construct'].unique())-0.5)

ax = axes[2]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='output_gmean', hue='construct', palette=app_palette,
                  legend=False, ax=ax, marker=app_markers[construct], **scatter_kwargs)
ax.set(title='Output mean', xlim=xlim, xlabel='', ylabel='', yscale='log',)
marker_baseline = stats_therapeutics.loc[(stats_therapeutics['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
sns.despine(ax=ax)

# slope
ax = axes[3]
for construct, group in plot_df2.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='slope', hue='construct', palette=app_palette,
                  legend=False, ax=ax, marker=app_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlim=xlim, xlabel='', ylabel='',)
marker_baseline = fits_therapeutics.loc[(fits_therapeutics['group']=='marker'), 'slope'].mean()
ax.axhline(marker_baseline, color='black', ls=':')
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
sns.despine(ax=ax)

fig.savefig(rd.outfile(output_path/fig_name))
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/fig_name))

In [None]:
subfig = subfigures['G2']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.3, bottom=0.45, top=0.2, right=0.1)
axes = subfig.subplots(1,4, gridspec_kw=dict(width_ratios=(1,0.4,0.8,0.8), wspace=0.3))

biorep = 2
plot_df = df_quantiles_therapeutics[(df_quantiles_therapeutics['biorep']==biorep) & (df_quantiles_therapeutics['name'].str.contains('FMRP'))]
plot_df.sort_values(['ts_num','ts_kind'], ascending=False, inplace=True)

# line plot
ax = axes[0]
sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=app_palette, 
             legend=False, dashes=False, style='construct', markers=app_markers, ax=ax, markersize=4, markeredgewidth=0.5,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
ax.set(xscale='log', yscale='log', xlabel='marker',)
sns.despine(ax=ax)
marker_baseline = sp.stats.gmean(df_quantiles_therapeutics.loc[(df_quantiles_therapeutics['group']=='marker') & (df_quantiles_therapeutics['biorep']==biorep), 'output'])
ax.axhline(marker_baseline, color='black', ls=':')

# histogram
ax = axes[1]
sns.kdeplot(data=plot_df, y='output', hue='construct', palette=app_palette, 
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax, bottom=True)
ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
ax.get_xaxis().set_visible(False)

plot_df = stats_therapeutics[(stats_therapeutics['name'].str.contains('FMRP'))]
plot_df2 = fits_therapeutics[(fits_therapeutics['name'].str.contains('FMRP'))]
plot_df.sort_values(['ts_num','ts_kind'], ascending=False, inplace=True)
plot_df2.sort_values(['ts_num','ts_kind'], ascending=False, inplace=True)

# stat gmean
xlim = (-0.5, len(plot_df['construct'].unique())-0.5)

ax = axes[2]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='output_gmean', hue='construct', palette=app_palette,
                  legend=False, ax=ax, marker=app_markers[construct], **scatter_kwargs)
ax.set(title='Output mean', xlim=xlim, xlabel='', ylabel='', yscale='log',)
marker_baseline = stats_therapeutics.loc[(stats_therapeutics['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')
ax.minorticks_off()

# slope
ax = axes[3]
for construct, group in plot_df2.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='slope', hue='construct', palette=app_palette,
                  legend=False, ax=ax, marker=app_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlim=xlim, xlabel='', ylabel='',)
marker_baseline = fits_therapeutics.loc[(fits_therapeutics['group']=='marker'), 'slope'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

for ax in axes: ax.minorticks_off()

fig.savefig(rd.outfile(output_path/fig_name))
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/fig_name))

In [None]:
# Save to OneDrive
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/fig_name))

MOI analysis

In [None]:
# plot_df = stats_quantiles_lenti[(stats_quantiles_lenti['dox']==1000) & (stats_quantiles_lenti['cell']=='MEF')]
# g = sns.FacetGrid(data=plot_df, hue='virus_dilution', palette='flare', col='construct', row='biorep')
# g.map(sns.scatterplot, 'bin_marker_quantiles_median', 'output_gmean',)
# g.set(xscale='log', yscale='log')