In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

### Load data

##### Load data from 293T, MEF lenti infections (`data_lenti`)

In [None]:
base_path_1 = rd.datadir/'instruments'/'data'/'attune'/'kasey'/'2024.04.05_exp89'/'export'
base_path_2 = rd.datadir/'instruments'/'data'/'attune'/'chris'/'2024.06.02-exp95-lenti-miR-iFFL'/'export'
plate_list = ['_'.join(x) for x in zip(
        ['plate'+str(i) for i in range(1,10)], 
        (['293T']*3 + ['MEF2A']*3 + ['MEF8A']*3),
        ['P9','P14','P15']*3
    )]

plates = pd.DataFrame({
    'data_path': [base_path_1/'293T_control', 
                  base_path_1/'293T_plate1', base_path_1/'293T_plate2', base_path_1/'293T_plate3',
                  base_path_1/'MEF_3_plate1', 
                  base_path_1/'MEF_4-1_plate1', base_path_1/'MEF_4-1_plate2', base_path_1/'MEF_4-1_plate3'] +
                 [base_path_2/p for p in plate_list],
    'yaml_path': [base_path_1/'kasey_yaml2'/'plate_control.yaml', 
                  base_path_1/'kasey_yaml2'/'plate01.yaml', base_path_1/'kasey_yaml2'/'plate02.yaml', base_path_1/'kasey_yaml2'/'plate03.yaml',
                  base_path_1/'kasey_yaml2'/'mef_3_plate01.yaml', 
                  base_path_1/'kasey_yaml2'/'mef_4-1_plate01.yaml', base_path_1/'kasey_yaml2'/'mef_4-1_plate02.yaml', base_path_1/'kasey_yaml2'/'mef_4-1_plate03.yaml'] +
                 [base_path_2/(p+'_metadata.yaml') for p in plate_list],
})

cache_path = rd.rootdir/'output'/'fig_designs'/'data_lenti.gzip'
output_path = rd.rootdir/'output'/'fig_applications'
metadata_path = rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx'

# Load data
data_lenti = pd.DataFrame()
if cache_path.is_file(): data_lenti = pd.read_parquet(cache_path)
else: 
    channel_list = ['mCherry-A','mRuby2-A','FSC-A','SSC-A','tagBFP-A','mGL-A']
    data_lenti = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data_lenti = data_lenti[data_lenti[c]>0]
    
    data_lenti.dropna(inplace=True)
    data_lenti.to_parquet(rd.outfile(cache_path))

data_lenti['cell'] = data_lenti['cell_type'].apply(lambda x: x.split('-')[0])
data_lenti['exp'] = data_lenti['cell_type'] + '_' + data_lenti['virus_batch']
data_lenti['moi'] = data_lenti['virus_dilution']

def map_biorep(df):
    biorep_map = {val:i for i,val in enumerate(df['exp'].unique())}
    d = df.copy()
    d['biorep'] = d['exp'].map(biorep_map)
    return d

data_lenti = data_lenti.groupby('cell')[data_lenti.columns].apply(map_biorep).reset_index(drop=True)
display(data_lenti)

In [None]:
# Gate cells
gates_lenti = pd.DataFrame()
channel_list = ['mGL-A', 'mCherry-A', 'mRuby2-A']
for channel in channel_list:
    gates_lenti[channel] = data_lenti[(data_lenti['virus_dilution']==0)].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates_lenti.reset_index(inplace=True)
display(gates_lenti)

# Add missing gates
gates_lenti.loc[len(gates_lenti.index)] = ['293T_P10'] + list(gates_lenti.loc[gates_lenti['exp']=='293T_na', channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['293T_P14_'] + list(gates_lenti.loc[gates_lenti['exp']=='293T_na', channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['293T_P16'] + list(gates_lenti.loc[gates_lenti['exp']=='293T_na', channel_list].mean().values) 
gates_lenti.loc[len(gates_lenti.index)] = ['MEF-3_P10'] + list(gates_lenti.loc[gates_lenti['exp'].str.contains('MEF'), channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['MEF-4-1_P10'] + list(gates_lenti.loc[gates_lenti['exp'].str.contains('MEF'), channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['MEF-4-1_P14'] + list(gates_lenti.loc[gates_lenti['exp'].str.contains('MEF'), channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['MEF-4-1_P16'] + list(gates_lenti.loc[gates_lenti['exp'].str.contains('MEF'), channel_list].mean().values)

# Indicate which channels are relevant for each experiment
gates_lenti.sort_values(['exp'], inplace=True)
gates_lenti['marker'] = 'mGL-A'
gates_lenti['output'] = 'mRuby2-A'

# Gate data by marker expression
data_lenti = data_lenti.groupby(['cell_type','virus_batch'])[data_lenti.columns].apply(lambda x: base.gate_data(x,gates_lenti))
data_lenti.reset_index(inplace=True, drop=True)
df_lenti = data_lenti[(data_lenti['expressing']) & (data_lenti['virus_dilution']!=0)]

##### Load data from lenti infection of T cells (`data_tcell`)

In [None]:
# Load T cell lenti data
tcell_path = rd.datadir/'instruments'/'data'/'collaborators'/'birnbaum_steph'/'2024-06-10 Galloway Exp 1'
cache_path = output_path/'data_tcell.gzip'

data_tcell = pd.DataFrame()
if cache_path.is_file(): data_tcell = pd.read_parquet(cache_path)
else: 
    channels = ['FSC-A', 'SSC-A', 'FITC-A', 'PE-A', 'APC-A750-A']
    data_tcell = rd.flow.load_csv_with_metadata(tcell_path/'export', tcell_path/'metadata.yaml', columns=channels)

    # Remove negative channel values
    for c in channels: data_tcell = data_tcell[data_tcell[c]>0]

    data_tcell = data_tcell.rename({'FITC-A': 'mGL-A', 'PE-A': 'mRuby2-A', 'APC-A750-A': 'livedead-A'}, axis=1)
    data_tcell.to_parquet(rd.outfile(cache_path))

data_tcell['exp'] = 'steph'
data_tcell['biorep'] = 1
data_tcell['cell'] = 'tcell'
display(data_tcell)

In [None]:
# Gate cells
gates_tcell = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A']
for channel in channel_list:
    gates_tcell[channel] = data_tcell[data_tcell['construct']=='UT'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.99995)) # adjust gate to better isolate marker+ pop
gates_tcell.reset_index(inplace=True)
display(gates_tcell)

# Indicate which channels are relevant for each experiment
gates_tcell.sort_values(['exp'], inplace=True)
gates_tcell['marker'] = 'mGL-A'
gates_tcell['output'] = 'mRuby2-A'

# Gate data by marker expression and live/dead stain
data_tcell = data_tcell.groupby('exp')[data_tcell.columns].apply(lambda x: base.gate_data(x,gates_tcell))
data_tcell.reset_index(inplace=True, drop=True)
livedead_gate = 2000
df_tcell = data_tcell[(data_tcell['expressing']) & (data_tcell['construct']!='UT') & (data_tcell['livedead-A']<livedead_gate)]

##### Load rat cortical neuron lenti infection (`data_neuron`)

In [None]:
# Load data
neuron_path = rd.datadir/'instruments'/'data'/'attune'/'chris'/'2024.06.15-rat-neurons'
cache_path = output_path/'data_neuron.gzip'

data_neuron = pd.DataFrame()
if cache_path.is_file(): data_neuron = pd.read_parquet(cache_path)
else: 
    channels = ['FSC-A', 'SSC-A', 'mGL-A', 'mRuby2-A', 'mCherry-A']
    data_neuron = rd.flow.load_csv_with_metadata(neuron_path/'export', neuron_path/'metadata.yaml', columns=channels)

    # Remove negative channel values
    for c in channel_list: data_neuron = data_neuron[data_neuron[c]>0]
    
    data_neuron.to_parquet(rd.outfile(cache_path))

data_neuron['biorep'] = 1
data_neuron['exp'] = 'cpj'
data_neuron['cell'] = 'neuron'
data_neuron['dox'] = 1000
display(data_neuron)

In [None]:
# Gate cells
gates_neuron = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A']
for channel in channel_list:
    gates_neuron[channel] = data_neuron[data_neuron['construct']=='UT'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates_neuron.reset_index(inplace=True)
display(gates_neuron)

# Indicate which channels are relevant for each experiment
gates_neuron.sort_values(['exp'], inplace=True)
gates_neuron['marker'] = 'mGL-A'
gates_neuron['output'] = 'mRuby2-A'

# Adjust marker gate to better isolate infected population
gates_neuron['mGL-A'] = 1e3

# Gate data by marker expression
data_neuron = data_neuron.groupby('exp')[data_neuron.columns].apply(lambda x: base.gate_data(x,gates_neuron))
data_neuron.reset_index(inplace=True, drop=True)
df_neuron = data_neuron[(data_neuron['expressing']) & (data_neuron['construct']!='UT')]

# Since there is no marker-only condition, save the output expression stats for untransduced cells
baseline_df_neuron = data_neuron[(data_neuron['construct']=='UT')].groupby(['exp'])['output'].apply(sp.stats.gmean).reset_index()

##### Metadata, color palettes, etc.

In [None]:
# Define metadata
metadata = base.get_metadata(metadata_path, style='designs')
ts_label = {'na': 'base', 'NT': 'OL', 'T': 'CL', 'none': '–'}
metadata['ts_label'] = metadata['ts_kind'].replace(ts_label)

# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
main_palette = metadata_dict['color']
main_markers = metadata_dict['markers']

##### Combine & compute stats

In [None]:
# Combine lenti data into a single df (data_comb)
data_comb = pd.concat([df_lenti, df_tcell, df_neuron], ignore_index=True)
data_comb.dropna(axis='columns', inplace=True)
display(data_comb)

In [None]:
# Since there is no marker-only condition, save the output expression stats for untransduced cells
baseline_df = data_lenti[(data_lenti['virus_dilution']==0)].groupby(['cell','exp','biorep'])['output'].apply(sp.stats.gmean).rename('output_gmean').reset_index()
baseline_df_tcell = data_tcell[(data_tcell['construct']=='UT')].groupby(['cell','exp','biorep'])['output'].apply(sp.stats.gmean).rename('output_gmean').reset_index()
baseline_df_neuron = data_neuron[(data_neuron['construct']=='UT')].groupby(['cell','exp','biorep'])['output'].apply(sp.stats.gmean).rename('output_gmean').reset_index()
baseline_df = pd.concat([baseline_df, baseline_df_tcell, baseline_df_neuron])
display(baseline_df)

In [None]:
# Bin data and calculate statistics
df_quantiles, stats, stats_quantiles, fits = base.calculate_bins_stats(data_comb, by=['cell','exp','biorep','construct','moi','dox'], num_bins=20, stat_list=[sp.stats.gmean, np.std, sp.stats.variation])
df_quantiles_fewer_bins, _, _, _ = base.calculate_bins_stats(data_comb, by=['cell','exp','biorep','construct','moi','dox'], num_bins=10)

# Add metadata
df_quantiles = df_quantiles.merge(metadata, how='left', on='construct')
stats = stats.merge(metadata, how='left', on='construct')
stats_quantiles = stats_quantiles.merge(metadata, how='left', on='construct')
fits = fits.merge(metadata, how='left', on='construct')
df_quantiles_fewer_bins = df_quantiles_fewer_bins.merge(metadata, how='left', on='construct')

### Set up figure

In [None]:
base_size = base.font_sizes['base_size']
smaller_size = base.font_sizes['smaller_size']

sns.set_style('ticks')
sns.set_context('paper', font_scale=1.0, rc={'font.size': base_size, 'font.family': 'sans-serif', 'font.sans-serif':['Arial']})
plt.rcParams.update({'axes.titlesize': base_size, 'axes.labelsize': base_size, 'xtick.labelsize': smaller_size, 'ytick.labelsize': smaller_size,
                     'pdf.fonttype': 42, 
                     'ytick.major.size': 3, 'xtick.major.size': 3, 'ytick.minor.size': 2, 'ytick.major.pad': 2, 'xtick.major.pad': 2, 
                     'lines.linewidth': 1,
                     'axes.spines.right': False, 'axes.spines.top': False, 'axes.labelpad': 2})

In [None]:
# Create the overall figure, gridspec, and add subfigure labels
fig = plt.figure(figsize=(6.8504, 1.5*6)) #4.48819, 6.8504
fig_gridspec = matplotlib.gridspec.GridSpec(6, 6, figure=fig,
    wspace=0.4, hspace=0.4, height_ratios=[1.5]*6, width_ratios=[1]*6)
subfigures = {
    'A': fig.add_subfigure(fig_gridspec[0,:]),
    'B': fig.add_subfigure(fig_gridspec[1,:4]),
    'C': fig.add_subfigure(fig_gridspec[1,4:]),
    'D': fig.add_subfigure(fig_gridspec[2,:2]),
    'E': fig.add_subfigure(fig_gridspec[2,2:]),
    'F': fig.add_subfigure(fig_gridspec[3,:4]),
    'G': fig.add_subfigure(fig_gridspec[3,4:]),
    'H': fig.add_subfigure(fig_gridspec[4,:4]),
    '': fig.add_subfigure(fig_gridspec[4,4:]),
    'I': fig.add_subfigure(fig_gridspec[5,:4]),
    'J': fig.add_subfigure(fig_gridspec[5,4:]),
}
for label, subfig in subfigures.items():
    if '2' in label: continue
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}', fontsize=base.font_sizes['subpanel_label'], 
                                           fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))
scatter_kwargs = dict(s=4, jitter=0.1, linewidth=0.5, edgecolor='white')

fig_name = 'fig_lenti-supp.pdf'
fig_path = rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'
fig.savefig(rd.outfile(output_path/fig_name))   # save locally
fig.savefig(rd.outfile(fig_path/fig_name))      # save to OneDrive

In [None]:
subfig = subfigures['B']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.45, top=0.35, right=0.15)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.3))

# iPSC controls
group_order = ['base','miR','ts3','ts5']
plot_df = stats[~stats['group'].isin(['controller','marker']) & (stats['cell']=='293T') &
                      (stats['dox']==1000) & (stats['moi']==1)].copy()
plot_df['group'] = plot_df['group'].astype(pd.api.types.CategoricalDtype(categories=group_order, ordered=True))
plot_df.sort_values(['group','ts'], inplace=True)

plot_df2 = fits[~fits['group'].isin(['controller','marker']) & (fits['cell']=='293T') & 
                      (fits['dox']==1000) & (fits['moi']==1)].copy()
plot_df2['group'] = plot_df2['group'].astype(pd.api.types.CategoricalDtype(categories=group_order, ordered=True))
plot_df2.sort_values(['group','ts'], inplace=True)

# shift xticks to add more space between promoter groups
buffer = 0.6
num_groups = 3
xtick_locs = [0, 1+buffer, 2+buffer] + [i+buffer*2 for i in range(3,7)]
construct_loc = {k:v for k,v in zip(plot_df['construct'].unique(), xtick_locs)}
plot_df['construct_loc'] = plot_df['construct'].replace(construct_loc)
construct_loc = {k:v for k,v in zip(plot_df2['construct'].unique(), xtick_locs)}
plot_df2['construct_loc'] = plot_df2['construct'].replace(construct_loc)
metadata['construct_loc'] = metadata['construct'].map(construct_loc)
m = metadata.dropna()
m['construct_loc'] = m['construct_loc'].astype(str)
scatter_kwargs2 = dict(s=4, jitter=0.3, linewidth=0.5, edgecolor='white', native_scale=True)
xlim = (-0.5, plot_df['construct_loc'].max()+0.5)

# adjust markers
m.loc[(m['miR_loc']=='CDS') | (m['ts_loc']=='3\''), 'markers'] = 'D'
m_dict = m.set_index('construct').to_dict('dict')
comb_markers = m_dict['markers']

# make xticklabels
def get_label(df):
    group = df['group'].unique()[0]
    d = df.copy()
    col_map = {'base': 'group', 'miR': 'miR', 'ts3': 'ts', 'ts5': 'ts'}
    d['label'] = d[col_map[group]]
    return d

m = m.groupby('group')[m.columns].apply(get_label).reset_index(drop=True)


# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct_loc', y='output_gmean', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=comb_markers[construct], **scatter_kwargs2)
ax.set(title='Mean', xlim=xlim, xlabel='', ylabel='', yscale='log', xticks=xtick_locs,)
baseline = baseline_df.loc[baseline_df['cell']=='293T', 'output_gmean'].mean()
ax.axhline(baseline, color='black', ls=':')

# stat std
ax = axes[1]
for construct, group in plot_df.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct_loc', y='output_std', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=comb_markers[construct], **scatter_kwargs2)
ax.set(title='Standard deviation', xlim=xlim, xlabel='', ylabel='', yscale='log', xticks=xtick_locs,)

# slope
ax = axes[2]
for construct, group in plot_df2.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct_loc', y='slope', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=comb_markers[construct], **scatter_kwargs2)
ax.set(title='Slope', xlim=xlim, xlabel='', ylim=(0,1.2), ylabel='', xticks=xtick_locs,
       yticks=[0,0.25,0.5,0.75,1], yticklabels=['0.0','','0.5','','1.0'])

for ax in axes:
    sns.despine(ax=ax)

    # add shaded region for miR-only constructs
    span1 = (xtick_locs[0]+(xtick_locs[1]-xtick_locs[0])/2, xtick_locs[2]+(xtick_locs[3]-xtick_locs[2])/2,)
    ax.axvspan(*span1, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    rd.plot.generate_xticklabels(m.drop_duplicates('construct_loc'), 'construct_loc', ['label'], annotate=False, ax=ax)
    ax.set_xticklabels([l.get_text().replace('.','-') for l in ax.get_xticklabels()], rotation=45, ha='right')
    ax.yaxis.set_minor_formatter(plt.NullFormatter())

fig.savefig(rd.outfile(fig_path))
fig.savefig(rd.outfile(fig_path/fig_name)) 

In [None]:
subfig = subfigures['C']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.5, bottom=0.45, top=0.35, right=0.1)
axes = subfig.subplots(1,2, gridspec_kw=dict(width_ratios=(1,1), wspace=0.5))

plot_df = stats[(stats['group'].isin(['controller','base'])) & (stats['cell']=='293T') &
                      (stats['dox']==0) & (stats['moi']==1)].copy()
plot_df2 = fits[(fits['group'].isin(['controller','base'])) & (fits['cell']=='293T') &
                      (fits['dox']==0) & (fits['moi']==1)].copy()
xlim = (-0.5, len(plot_df['design'].unique())-0.5)

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='design', y='output_gmean', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlim=xlim, xlabel='design', ylabel='', yscale='log', ylim=(2.5e1,6e1))
baseline = baseline_df.loc[baseline_df['cell']=='293T', 'output_gmean'].mean()
ax.axhline(baseline, color='black', ls=':')

# slope
ax = axes[1]
for construct, group in plot_df2.groupby('construct'):
    sns.stripplot(data=group, x='design', y='slope', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlim=xlim, xlabel='design', ylabel='', ylim=(-0.2,0.2))
ax.axhline(0, color='black', ls=':')

fig.savefig(rd.outfile(fig_path))
fig.savefig(rd.outfile(fig_path/fig_name)) 

In [None]:
subfig = subfigures['D']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.45, top=0.35, right=0.15)
axes = subfig.subplots(1,2, gridspec_kw=dict(width_ratios=(1,1), wspace=0.4))

plot_df = stats[(stats['cell']=='neuron') & (stats['moi']==1)]

# mean
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='output_gmean', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log', ylim=(6e2,1e4))
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
sns.despine(ax=ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

# std
ax = axes[1]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='output_std', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Std.', xlabel='', ylabel='', yscale='log', ylim=(2e2,3e3))
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
sns.despine(ax=ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

fig.savefig(rd.outfile(fig_path))
fig.savefig(rd.outfile(fig_path/fig_name)) 

In [None]:
subfig = subfigures['F']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.45, top=0.35, right=0.15)
axes = subfig.subplots(1,5, gridspec_kw=dict(width_ratios=(1,1,1,1,1.5), wspace=0.5))

plot_df = stats[(stats['cell']=='MEF') & (stats['moi']==1) & (stats['dox']==1000) & (stats['group'].isin(['base','controller'])) &
                (stats['design']<=1)]
plot_df2 = fits[(fits['cell']=='MEF') & (fits['moi']==1) & (fits['dox']==1000) & (fits['group'].isin(['base','controller'])) &
                (fits['design']<=1)]

# mean
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='output_gmean', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log', ylim=(3e1,6e3))
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
sns.despine(ax=ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

# std
ax = axes[1]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='output_std', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Std.', xlabel='', ylabel='', yscale='log', ylim=(4e1,6e3))
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
sns.despine(ax=ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

# slope
ax = axes[2]
for construct, group in plot_df2.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='slope', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlabel='', ylabel='', ylim=(0,1))
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
sns.despine(ax=ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

# CV
ax = axes[3]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='output_variation', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='CV', xlabel='', ylabel='', ylim=(0.5,1.8))
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
sns.despine(ax=ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

# CDF
ax = axes[4]
plot_df = df_quantiles[(df_quantiles['group'].isin(['controller','base'])) & (df_quantiles['dox']==1000) & 
                        (df_quantiles['moi']==1) & (df_quantiles['cell']=='MEF') & (df_quantiles['design']<=1) &
                        (((df_quantiles['ts_kind']=='na') & (df_quantiles['biorep']==6)) | ((df_quantiles['biorep']==1)))
                      ].copy()
plot_df.sort_values(['ts_num','ts_kind'], ascending=False, inplace=True)
plot_order = reversed(plot_df['construct'].unique())

sns.kdeplot(data=plot_df, x='output', hue='construct', palette=main_palette, ax=ax,
            cumulative=True, common_norm=False, legend=False, log_scale=True)
ax.set(xticks=np.logspace(2,6,5), ylabel='', xlim=(2e1,2e4), title='CDF')
sns.despine(ax=ax)
ax.minorticks_off()
ax.grid(zorder=-1, color=base.get_light_color(base.get_light_color(base.colors['gray'])), which='both',alpha=0.7)

fig.savefig(rd.outfile(fig_path))
fig.savefig(rd.outfile(fig_path/fig_name)) 

In [None]:
subfig = subfigures['G']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.5, bottom=0.45, top=0.35, right=0.1)
axes = subfig.subplots(1,2, gridspec_kw=dict(width_ratios=(1,1), wspace=0.5))

plot_df = stats[(stats['group'].isin(['controller','base'])) & (stats['cell']=='MEF') &
                      (stats['dox']==0) & (stats['moi']==1)].copy()
plot_df2 = fits[(fits['group'].isin(['controller','base'])) & (fits['cell']=='MEF') &
                      (fits['dox']==0) & (fits['moi']==1)].copy()
xlim = (-0.5, len(plot_df['design'].unique())-0.5)

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='design', y='output_gmean', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlim=xlim, xlabel='design', ylabel='', yscale='log', ylim=(2.5e1,6e1))
baseline = baseline_df.loc[baseline_df['cell']=='MEF', 'output_gmean'].mean()
ax.axhline(baseline, color='black', ls=':')

# slope
ax = axes[1]
for construct, group in plot_df2.groupby('construct'):
    sns.stripplot(data=group, x='design', y='slope', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlim=xlim, xlabel='design', ylabel='', ylim=(-0.2,0.2))
ax.axhline(0, color='black', ls=':')

fig.savefig(rd.outfile(fig_path))
fig.savefig(rd.outfile(fig_path/fig_name)) 

In [None]:
subfig = subfigures['H']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.45, top=0.35, right=0.15)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.3))

# iPSC controls
group_order = ['base','miR','ts3','ts5']
plot_df = stats[~stats['group'].isin(['controller','marker']) & (stats['cell']=='MEF') &
                      (stats['dox']==1000) & (stats['moi']==1)].copy()
plot_df['group'] = plot_df['group'].astype(pd.api.types.CategoricalDtype(categories=group_order, ordered=True))
plot_df.sort_values(['group','ts'], inplace=True)

plot_df2 = fits[~fits['group'].isin(['controller','marker']) & (fits['cell']=='MEF') & 
                      (fits['dox']==1000) & (fits['moi']==1)].copy()
plot_df2['group'] = plot_df2['group'].astype(pd.api.types.CategoricalDtype(categories=group_order, ordered=True))
plot_df2.sort_values(['group','ts'], inplace=True)

# shift xticks to add more space between promoter groups
buffer = 0.6
num_groups = 3
xtick_locs = [0, 1+buffer, 2+buffer] + [i+buffer*2 for i in range(3,7)]
construct_loc = {k:v for k,v in zip(plot_df['construct'].unique(), xtick_locs)}
plot_df['construct_loc'] = plot_df['construct'].replace(construct_loc)
construct_loc = {k:v for k,v in zip(plot_df2['construct'].unique(), xtick_locs)}
plot_df2['construct_loc'] = plot_df2['construct'].replace(construct_loc)
metadata['construct_loc'] = metadata['construct'].map(construct_loc)
m = metadata.dropna()
m['construct_loc'] = m['construct_loc'].astype(str)
scatter_kwargs2 = dict(s=4, jitter=0.3, linewidth=0.5, edgecolor='white', native_scale=True)
xlim = (-0.5, plot_df['construct_loc'].max()+0.5)

# adjust markers
m.loc[(m['miR_loc']=='CDS') | (m['ts_loc']=='3\''), 'markers'] = 'D'
m_dict = m.set_index('construct').to_dict('dict')
comb_markers = m_dict['markers']

# make xticklabels
def get_label(df):
    group = df['group'].unique()[0]
    d = df.copy()
    col_map = {'base': 'group', 'miR': 'miR', 'ts3': 'ts', 'ts5': 'ts'}
    d['label'] = d[col_map[group]]
    return d

m = m.groupby('group')[m.columns].apply(get_label).reset_index(drop=True)


# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct_loc', y='output_gmean', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=comb_markers[construct], **scatter_kwargs2)
ax.set(title='Mean', xlim=xlim, xlabel='', ylabel='', yscale='log', xticks=xtick_locs,)
baseline = baseline_df.loc[baseline_df['cell']=='293T', 'output_gmean'].mean()
ax.axhline(baseline, color='black', ls=':')

# stat std
ax = axes[1]
for construct, group in plot_df.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct_loc', y='output_std', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=comb_markers[construct], **scatter_kwargs2)
ax.set(title='Standard deviation', xlim=xlim, xlabel='', ylabel='', yscale='log', xticks=xtick_locs,)

# slope
ax = axes[2]
for construct, group in plot_df2.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct_loc', y='slope', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=comb_markers[construct], **scatter_kwargs2)
ax.set(title='Slope', xlim=xlim, xlabel='', ylim=(0,1.2), ylabel='', xticks=xtick_locs,
       yticks=[0,0.25,0.5,0.75,1], yticklabels=['0.0','','0.5','','1.0'])

for ax in axes:
    sns.despine(ax=ax)

    # add shaded region for miR-only constructs
    span1 = (xtick_locs[0]+(xtick_locs[1]-xtick_locs[0])/2, xtick_locs[2]+(xtick_locs[3]-xtick_locs[2])/2,)
    ax.axvspan(*span1, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    rd.plot.generate_xticklabels(m.drop_duplicates('construct_loc'), 'construct_loc', ['label'], annotate=False, ax=ax)
    ax.set_xticklabels([l.get_text().replace('.','-') for l in ax.get_xticklabels()], rotation=45, ha='right')
    ax.yaxis.set_minor_formatter(plt.NullFormatter())

fig.savefig(rd.outfile(fig_path))
fig.savefig(rd.outfile(fig_path/fig_name)) 

In [None]:
subfig = subfigures['I']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.45, top=0.35, right=0.15)
axes = subfig.subplots(1,5, gridspec_kw=dict(width_ratios=(1,1,1,1,1.5), wspace=0.5))

plot_df = stats[(stats['cell']=='tcell') & (stats['moi']==1) & (stats['dox']==1000)]
plot_df2 = fits[(fits['cell']=='tcell') & (fits['moi']==1) & (fits['dox']==1000) ]

# mean
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='output_gmean', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log', ylim=(1e4,5e5))
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
sns.despine(ax=ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

# std
ax = axes[1]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='output_std', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Std.', xlabel='', ylabel='', yscale='log', ylim=(1e4,5e5))
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
sns.despine(ax=ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

# slope
ax = axes[2]
for construct, group in plot_df2.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='slope', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlabel='', ylabel='', ylim=(0.5,1.4))
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
sns.despine(ax=ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

# CV
ax = axes[3]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='output_variation', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='CV', xlabel='', ylabel='', ylim=(0.5,1.4))
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
sns.despine(ax=ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

# CDF
ax = axes[4]
plot_df = df_quantiles[(df_quantiles['dox']==1000) & (df_quantiles['moi']==1) & (df_quantiles['cell']=='tcell')].copy()
plot_df.sort_values(['ts_num','ts_kind'], ascending=False, inplace=True)
plot_order = reversed(plot_df['construct'].unique())

sns.kdeplot(data=plot_df, x='output', hue='construct', palette=main_palette, ax=ax,
            cumulative=True, common_norm=False, legend=False, log_scale=True)
ax.set(xticks=np.logspace(2,6,5), ylabel='', xlim=(2e2,6e6), title='CDF')
sns.despine(ax=ax)
ax.minorticks_off()
ax.grid(zorder=-1, color=base.get_light_color(base.get_light_color(base.colors['gray'])), which='both',alpha=0.7)

fig.savefig(rd.outfile(fig_path))
fig.savefig(rd.outfile(fig_path/fig_name)) 

In [None]:
subfig = subfigures['J']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.5, bottom=0.45, top=0.35, right=0.1)
axes = subfig.subplots(1,2, gridspec_kw=dict(width_ratios=(1,1), wspace=0.5))

plot_df = stats[(stats['cell']=='tcell') & (stats['dox']==0) & (stats['moi']==1)].copy()
plot_df2 = fits[(fits['cell']=='tcell') & (fits['dox']==0) & (fits['moi']==1)].copy()

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='output_gmean', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log', ylim=(5e1,1e3))
baseline = baseline_df.loc[baseline_df['cell']=='tcell', 'output_gmean'].mean()
ax.axhline(baseline, color='black', ls=':')
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

# slope
ax = axes[1]
for construct, group in plot_df2.groupby('construct'):
    sns.stripplot(data=group, x='construct', y='slope', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlabel='', ylabel='',)
ax.axhline(0, color='black', ls=':')
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_label'], ax=ax, annotate=False)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

fig.savefig(rd.outfile(fig_path))
fig.savefig(rd.outfile(fig_path/fig_name)) 