In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

### Load data

Circuit tuning data (`data`)

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'

exp90_path = base_path/'2024.03.31_exp90'/'export'
exp90_2_path = base_path/'2024.04.02_exp90.2'/'export'
exp90_3_path = base_path/'2024.04.02_exp90.3'/'export'
exp90_4_path = base_path/'2024.04.05_exp90.4'/'export'
exp91_path = base_path/'2024.04.08_exp91'/'export'
exp92_path = base_path/'2024.04.12_exp92'/'export'

plates = pd.DataFrame({
    'data_path': [exp90_path/'plate1', exp90_path/'plate2', 
                  exp90_2_path, exp90_4_path,
                  exp90_3_path/'plate1', exp90_3_path/'plate2', 
                  exp91_path/'plate1.1', exp91_path/'plate1.2', exp91_path/'plate1.3', 
                  exp91_path/'plate2.1', exp91_path/'plate2.2', exp91_path/'plate2.3',
                  exp92_path/'plate1.1', exp92_path/'plate1.2', exp92_path/'plate1.3', 
                  exp92_path/'plate2.1', exp92_path/'plate2.2', exp92_path/'plate2.3',],
    
    'yaml_path': ([exp90_path/'exp90_plate1_wells.yaml', exp90_path/'exp90_plate2_wells.yaml', 
                   exp90_path/'exp90_plate2_wells.yaml', exp90_path/'exp90_plate1_wells.yaml',
                   exp90_path/'exp90_plate1_wells.yaml', exp90_path/'exp90_plate2_wells.yaml', ] +
                  [exp91_path/'exp91_plate1_wells.yaml']*3 + 
                  [exp91_path/'exp91_plate2.1_wells.yaml', exp91_path/'exp91_plate2.2_wells.yaml', exp91_path/'exp91_plate2.3_wells.yaml'] +
                  [exp92_path/'exp92_plate1_wells.yaml', exp92_path/'exp92_plate1.2_wells.yaml', exp92_path/'exp92_plate1_wells.yaml',
                   exp92_path/'exp92_plate2_wells.yaml', exp92_path/'exp92_plate2.2_wells.yaml', exp92_path/'exp92_plate2_wells.yaml',]
                  ),
    
    'biorep': ([1, 1, 
                2, 2, 
                3, 3,] + 
                [1, 2, 3,]*4),
    
    'exp': (['exp90', 'exp90', 
             'exp90.2', 'exp90.4', 
             'exp90.3', 'exp90.3',] + 
            ['exp91']*6 + 
            ['exp92']*6)
})

output_path = rd.rootdir/'output'/'fig_designs-supp'
cache_path = rd.rootdir/'output'/'fig_overview'/'data.gzip'
metadata_path = rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx'

# Load data
data = pd.DataFrame()
if cache_path.is_file(): data = pd.read_parquet(cache_path)
else: 
    channel_list = ['mCherry-A','mRuby2-A','FSC-A','SSC-A','tagBFP-A','mGL-A']
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data = data[data[c]>0]
    
    data.dropna(inplace=True)
    data.to_parquet(rd.outfile(cache_path))

# Add metadata for constructs
metadata = base.get_metadata(metadata_path, style='designs')
data = data.merge(metadata, how='left', on='construct')
display(data)

In [None]:
# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
designs_palette = metadata_dict['color']
designs_markers = metadata_dict['markers']

In [None]:
# Gate cells
gates = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A']
for channel in channel_list:
    gates[channel] = data[data['construct']=='UT'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates.reset_index(inplace=True)

# Add missing gates
gates.loc[len(gates.index)] = ['exp90.4',0,0,]  
gates.loc[gates['exp']=='exp90.4', channel_list] = gates.loc[gates['exp']=='exp90.2', channel_list].values

# Indicate which channels are relevant for each experiment
gates.sort_values(['exp'], inplace=True)
gates['marker'] = 'mGL-A'
gates['output'] = 'mRuby2-A'

# Gate data by transfection marker expression
data = data.groupby('exp')[data.columns].apply(lambda x: base.gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
df = data[(data['expressing']) & (data['construct']!='UT') & (data['exp']!='elp_exp61')]

In [None]:
# Bin data and calculate statistics
df_quantiles, stats, _, fits = base.calculate_bins_stats(df, num_bins=20)
stats = stats.merge(metadata, how='left', on='construct')
fits = fits.merge(metadata, how='left', on='construct')

In [None]:
ts_label = {'na': 'base', 'NT': 'OL', 'T': 'CL', 'none': '–'}
metadata['ts_label'] = metadata['ts_kind'].replace(ts_label)

Two-gene data (`data2`)

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'
exp93_path = base_path/'2024.04.14_exp93'/'export'

plates = pd.DataFrame({
    'data_path': [exp93_path/'plate1', exp93_path/'plate2', exp93_path/'plate3',],
    'yaml_path': [exp93_path/'exp93_wells.yaml']*3,
    'biorep': [1, 2, 3],
    'exp': ['exp93']*3,
})

cache_path = rd.rootdir/'output'/'fig_architecture'/'data.gzip'

# Load data
data2 = pd.DataFrame()
if cache_path.is_file(): data2 = pd.read_parquet(cache_path)
else: 
    channel_list = ['mRuby2-A','FSC-A','SSC-A','tagBFP-A','mGL-A','SNAP-647-A']
    data2 = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data2 = data2[data2[c]>0]
    
    data2.dropna(inplace=True)
    data2.to_parquet(rd.outfile(cache_path))

In [None]:
# Add metadata for constructs
data2 = data2.merge(metadata, how='left', on='construct')
metadata_construct2 = pd.read_excel(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct2-metadata.xlsx')
data2 = data2.merge(metadata_construct2, how='left', on='construct2')
data2['condition'] = data2['construct'] + '_' + data2['construct2']

# Rename far-red channel
data2.rename(columns={'SNAP-647-A': 'iRFP-A'}, inplace=True)
display(data2)

In [None]:
# Gate cells
gates2 = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A']
for channel in channel_list:
    gates2[channel] = data2[data2['construct']=='GEEC555'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates2.reset_index(inplace=True)

# Add missing iRFP gate
gate_iRFP = 2.5e2
gates2['iRFP-A'] = [gate_iRFP]

# Indicate which channels are relevant for each experiment
gates2.sort_values(['exp'], inplace=True)
gates2['marker'] = 'iRFP-A'
gates2['output'] = 'mRuby2-A'

# Gate data by transfection marker expression
data2 = data2.groupby('exp')[data2.columns].apply(lambda x: base.gate_data(x,gates2))
data2.reset_index(inplace=True, drop=True)
df2 = data2[(data2['expressing']) & (data2['construct']!='UT')]

In [None]:
# Bin data and calculate statistics
by = ['condition','construct','construct2','biorep','exp']
df_quantiles2, stats2, _, fits2 = base.calculate_bins_stats(df2, by=by)
df_quantiles2.sort_values(['design','ts_kind'], inplace=True)

stats2 = stats2.merge(metadata, how='left', on='construct')
stats2.sort_values(['design','ts_kind'], inplace=True)
stats2 = stats2.merge(metadata_construct2, how='left', on='construct2')

fits2 = fits2.merge(metadata, how='left', on='construct')
fits2.sort_values(['design','ts_kind'], inplace=True)
fits2 = fits2.merge(metadata_construct2, how='left', on='construct2')


In [None]:
# output range of 5-95 percentile
p_5 = df2.groupby(by)[['output']].apply(lambda x: np.percentile(x, 5)).rename('output_5th').reset_index()
p_95 = df2.groupby(by)[['output']].apply(lambda x: np.percentile(x, 95)).rename('output_95th').reset_index()
stats2 = stats2.merge(p_5, how='left')
stats2 = stats2.merge(p_95, how='left')
stats2['output_range'] = stats2['output_95th'] - stats2['output_5th']
stats2['output_range_log'] = stats2['output_95th'].apply(np.log10) - stats2['output_5th'].apply(np.log10)

# fraction within 1 order of magnitude (10x) around median
def get_high_low(df):
    median = df['output'].median()
    return df.loc[(df['output']>(median * 10**(-0.5))) & (df['output']<(median * 10**0.5)), 'output'].count() / df['output'].count()

fraction = df2.groupby(by)[df2.columns].apply(get_high_low).rename('fraction_within_10x').reset_index()
stats2 = stats2.merge(fraction, how='left')

In [None]:
metadata_comb = data2.drop_duplicates('condition')[['construct','construct2','condition']]
metadata_comb = metadata_comb.merge(metadata, how='left', on='construct')
metadata_comb = metadata_comb.merge(metadata_construct2, how='left', on='construct2')

# Create color palette by architecture
metadata_comb.loc[metadata_comb['gene']=='1T', 'color'] = base.colors['teal']
metadata_comb.loc[metadata_comb['gene']=='2T', 'color'] = base.colors['green']
metadata_comb.loc[metadata_comb['gene']=='2V', 'color'] = base.colors['purple']

# markers
metadata_comb['markers'] = 'X'
metadata_comb.loc[metadata_comb['gene']=='1T', 'markers'] = 'o'
metadata_comb.loc[metadata_comb['gene']=='2T', 'markers'] = 'D'
metadata_comb.loc[metadata_comb['gene']=='2V', 'markers'] = 's'

ts_label = {'na': 'base', 'NT': 'OL', 'T': 'CL'}
metadata_comb['ts_label'] = metadata_comb['ts_kind'].replace(ts_label)

metadata_dict = metadata_comb.set_index('gene').to_dict('dict')
gene_palette = metadata_dict['color']
gene_markers = metadata_dict['markers']

# Create color palette by kind (design)
metadata_comb.loc[(metadata_comb['gene']=='1T') & (metadata_comb['design']==2), 'color'] = base.colors['orange']
metadata_comb.loc[(metadata_comb['gene']=='1T') & (metadata_comb['design']==3), 'color'] = base.colors['red']

metadata_comb['kind'] = metadata_comb['gene'] + '_' + metadata_comb['design'].astype(str)
metadata_dict = metadata_comb.set_index('kind').to_dict('dict')
kind_palette = metadata_dict['color']
kind_markers = metadata_dict['markers']

# Create color palette by condition (design)
metadata_comb.loc[(metadata_comb['gene']=='2V') & (metadata_comb['construct2_promoter']=='U6'), 'color'] = base.colors['blue']
metadata_comb.loc[(metadata_comb['ts_kind']=='NT'), 'color'] = base.colors['gray']
metadata_comb.loc[(metadata_comb['ts_kind']=='NT') & (metadata_comb['gene']=='1T') & (metadata_comb['design']==3), 
                  'color'] = metadata_comb.loc[(metadata_comb['ts_kind']=='NT') & (metadata_comb['gene']=='1T') & 
                                               (metadata_comb['design']==3), 'color'].apply(base.get_dark_color)
metadata_comb.loc[metadata_comb['group'].isin(['base','marker']), 'color'] = 'black'
metadata_comb['condition'] = metadata_comb['construct'] + '_' + metadata_comb['construct2']
metadata_dict = metadata_comb.set_index('condition').to_dict('dict')
condition_palette = metadata_dict['color']
condition_markers = metadata_dict['markers']

architecture_order = ['1T', '2T', '2V']

In [None]:
# Create a second palette for regular tuning
metadata2 = base.get_metadata(metadata_path, style='tuning')
metadata_dict2 = metadata2.set_index('construct').to_dict('dict')
main_palette = metadata_dict2['color']
main_markers = metadata_dict2['markers']

Load PiggyBac data (`data_pb`)

In [None]:
pb_path = base_path/'2023.07.18_exp63.3-RC'/'export'

plates = pd.DataFrame({
    'data_path': [pb_path],
    
    'yaml_path': [pb_path/'exp63.3_wells2.yaml'],
    
    'biorep': [1],
    
    'exp': ['exp63.3-RC']
})

cache_path = rd.rootdir/'output'/'fig_designs'/'data_pb.gzip'

# Load data
data_pb = pd.DataFrame()
if cache_path.is_file(): data_pb = pd.read_parquet(cache_path)
else: 
    channel_list = ['mCherry-A','mRuby2-A','FSC-A','SSC-A','tagBFP-A','mGL-A']
    data_pb = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data_pb = data_pb[data_pb[c]>0]
    
    data_pb.dropna(inplace=True)
    data_pb.to_parquet(rd.outfile(cache_path))

# Add metadata for constructs
data_pb = data_pb.merge(metadata, how='left', on='construct')
display(data_pb)

In [None]:
# Gate cells
# Add missing gates (use gates from tuning exp)
channel_list = ['mGL-A', 'mRuby2-A']
gates.loc[len(gates.index)] = ['exp63.3-RC'] + list(gates.loc[gates['exp']!='exp63.3-RC', channel_list].mean().values) + ['mGL-A','mRuby2-A']  

# Gate data by transfection marker expression
data_pb = data_pb.groupby('exp')[data_pb.columns].apply(lambda x: base.gate_data(x,gates))
data_pb.reset_index(inplace=True, drop=True)
df_pb = data_pb[(data_pb['expressing']) & (data_pb['construct']!='UT')]

In [None]:
# Bin data and calculate statistics
df_quantiles_pb, stats_pb, _, fits_pb = base.calculate_bins_stats(df_pb, num_bins=20)
stats_pb = stats_pb.merge(metadata, how='left', on='construct')
fits_pb = fits_pb.merge(metadata, how='left', on='construct')

Load data from 293T, MEF lenti infections (`data_lenti`)

In [None]:
base_path_1 = rd.datadir/'instruments'/'data'/'attune'/'kasey'/'2024.04.05_exp89'/'export'
base_path_2 = rd.datadir/'instruments'/'data'/'attune'/'chris'/'2024.06.02-exp95-lenti-miR-iFFL'/'export'
plate_list = ['_'.join(x) for x in zip(
        ['plate'+str(i) for i in range(1,10)], 
        (['293T']*3 + ['MEF2A']*3 + ['MEF8A']*3),
        ['P9','P14','P15']*3
    )]

plates = pd.DataFrame({
    'data_path': [base_path_1/'293T_control', 
                  base_path_1/'293T_plate1', base_path_1/'293T_plate2', base_path_1/'293T_plate3',
                  base_path_1/'MEF_3_plate1', 
                  base_path_1/'MEF_4-1_plate1', base_path_1/'MEF_4-1_plate2', base_path_1/'MEF_4-1_plate3'] +
                 [base_path_2/p for p in plate_list],
    'yaml_path': [base_path_1/'kasey_yaml2'/'plate_control.yaml', 
                  base_path_1/'kasey_yaml2'/'plate01.yaml', base_path_1/'kasey_yaml2'/'plate02.yaml', base_path_1/'kasey_yaml2'/'plate03.yaml',
                  base_path_1/'kasey_yaml2'/'mef_3_plate01.yaml', 
                  base_path_1/'kasey_yaml2'/'mef_4-1_plate01.yaml', base_path_1/'kasey_yaml2'/'mef_4-1_plate02.yaml', base_path_1/'kasey_yaml2'/'mef_4-1_plate03.yaml'] +
                 [base_path_2/(p+'_metadata.yaml') for p in plate_list],
})

cache_path = rd.rootdir/'output'/'fig_designs'/'data_lenti.gzip'
metadata_path = rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx'

# Load data
data_lenti = pd.DataFrame()
if cache_path.is_file(): data_lenti = pd.read_parquet(cache_path)
else: 
    channel_list = ['mCherry-A','mRuby2-A','FSC-A','SSC-A','tagBFP-A','mGL-A']
    data_lenti = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data_lenti = data_lenti[data_lenti[c]>0]
    
    #data.dropna(inplace=True)
    data_lenti.to_parquet(rd.outfile(cache_path))

# Add metadata for constructs
data_lenti = data_lenti.merge(metadata, how='left', on='construct')
data_lenti['cell'] = data_lenti['cell_type'].apply(lambda x: x.split('-')[0])
data_lenti['exp'] = data_lenti['cell_type'] + '_' + data_lenti['virus_batch']

def map_biorep(df):
    biorep_map = {val:i for i,val in enumerate(df['exp'].unique())}
    d = df.copy()
    d['biorep'] = d['exp'].map(biorep_map)
    return d

data_lenti = data_lenti.groupby('cell')[data_lenti.columns].apply(map_biorep).reset_index(drop=True)
display(data_lenti)

In [None]:
# Gate cells
gates_lenti = pd.DataFrame()
channel_list = ['mGL-A', 'mCherry-A', 'mRuby2-A']
for channel in channel_list:
    gates_lenti[channel] = data_lenti[(data_lenti['virus_dilution']==0)].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates_lenti.reset_index(inplace=True)

# Add missing gates
gates_lenti.loc[len(gates_lenti.index)] = ['293T_P10'] + list(gates_lenti.loc[gates_lenti['exp']=='293T_na', channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['293T_P14_'] + list(gates_lenti.loc[gates_lenti['exp']=='293T_na', channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['293T_P16'] + list(gates_lenti.loc[gates_lenti['exp']=='293T_na', channel_list].mean().values) 
gates_lenti.loc[len(gates_lenti.index)] = ['MEF-3_P10'] + list(gates_lenti.loc[gates_lenti['exp'].str.contains('MEF'), channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['MEF-4-1_P10'] + list(gates_lenti.loc[gates_lenti['exp'].str.contains('MEF'), channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['MEF-4-1_P14'] + list(gates_lenti.loc[gates_lenti['exp'].str.contains('MEF'), channel_list].mean().values)
gates_lenti.loc[len(gates_lenti.index)] = ['MEF-4-1_P16'] + list(gates_lenti.loc[gates_lenti['exp'].str.contains('MEF'), channel_list].mean().values)

# Indicate which channels are relevant for each experiment
gates_lenti.sort_values(['exp'], inplace=True)
gates_lenti['marker'] = 'mGL-A'
gates_lenti['output'] = 'mRuby2-A'

# Gate data by marker expression
data_lenti = data_lenti.groupby(['cell_type','virus_batch'])[data_lenti.columns].apply(lambda x: base.gate_data(x,gates_lenti))
data_lenti.reset_index(inplace=True, drop=True)
df_lenti = data_lenti[(data_lenti['expressing']) & (data_lenti['virus_dilution']!=0)]

# Since there is no marker-only condition, save the output expression stats for untransduced cells
baseline_df = data_lenti[(data_lenti['virus_dilution']==0)].groupby(['exp','cell'])['output'].apply(sp.stats.gmean).reset_index()

In [None]:
# Bin data and calculate statistics
df_lenti['output'] = df_lenti['output'].astype(float)
df_lenti['marker'] = df_lenti['marker'].astype(float)
df_quantiles_lenti, stats_lenti, _, fits_lenti = base.calculate_bins_stats(df_lenti, by=['exp','cell','biorep','construct','dox','virus_dilution'], num_bins=20)
stats_lenti = stats_lenti.merge(metadata, how='left', on='construct')
fits_lenti = fits_lenti.merge(metadata, how='left', on='construct')

### Set up figure

In [None]:
base_size = base.font_sizes['base_size']
smaller_size = base.font_sizes['smaller_size']

sns.set_style('ticks')
sns.set_context('paper', font_scale=1.0, rc={'font.size': base_size, 'font.family': 'sans-serif', 'font.sans-serif':['Arial']})
plt.rcParams.update({'axes.titlesize': base_size, 'axes.labelsize': base_size, 'xtick.labelsize': smaller_size, 'ytick.labelsize': smaller_size,
                     'pdf.fonttype': 42, 
                     'ytick.major.size': 3, 'xtick.major.size': 3, 'ytick.minor.size': 2, 'ytick.major.pad': 2, 'xtick.major.pad': 2, 
                     'lines.linewidth': 1,
                     'axes.spines.right': False, 'axes.spines.top': False, 'axes.labelpad': 2})

In [None]:
# Create the overall figure, gridspec, and add subfigure labels
fig = plt.figure(figsize=(6.8504,9.75))
fig_gridspec = matplotlib.gridspec.GridSpec(6, 6, figure=fig,
    wspace=0.4, hspace=0.4, height_ratios=(1.75,1.5,1.5,2,1.5,1.5),)
subfigures = {
    'A': fig.add_subfigure(fig_gridspec[0,:3]),
    'B': fig.add_subfigure(fig_gridspec[0,3:]),
    'C': fig.add_subfigure(fig_gridspec[1,:3]),
    'D': fig.add_subfigure(fig_gridspec[1,3:]),
    'E': fig.add_subfigure(fig_gridspec[2,:3]),
    'F': fig.add_subfigure(fig_gridspec[2,3:]),
    'G': fig.add_subfigure(fig_gridspec[3,:2]),
    'H': fig.add_subfigure(fig_gridspec[3,2:]),
    'I': fig.add_subfigure(fig_gridspec[4,:]),
    'J': fig.add_subfigure(fig_gridspec[5,:4]),
    'K': fig.add_subfigure(fig_gridspec[5,4:]),
}
for label, subfig in subfigures.items():
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}', fontsize=base.font_sizes['subpanel_label'], 
                                           fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))
scatter_kwargs = dict(s=4, jitter=0.2, linewidth=0.5, edgecolor='white')
xlim = (2e2,6e4)
ylim = (2e1,1e5)

fig_path = output_path/'fig_designs-supp.pdf'
fig.savefig(rd.outfile(fig_path))

In [None]:
subfig = subfigures['A']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.5, top=0.65, right=0.15)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.35))

# EF1a miR controls
miR_order = ['none', 'miR.FF5', 'miR.FF4', 'miRE.FF5', 'miRE.FF4',]
plot_df = stats[((((stats['group']=='miR')) & (stats['miR_loc']=='UTR')) | (stats['group']=='base')) &
                (stats['promoter']=='EF1a')].copy()
plot_df['miR'] = plot_df['miR'].astype(pd.api.types.CategoricalDtype(categories=miR_order, ordered=True))
plot_df2 = fits[((((fits['group']=='miR')) & (fits['miR_loc']=='UTR')) | (fits['group']=='base')) &
                (fits['promoter']=='EF1a')].copy()
plot_df2['miR'] = plot_df2['miR'].astype(pd.api.types.CategoricalDtype(categories=miR_order, ordered=True))
xlim = (-0.5, len(plot_df['construct'].unique())-0.5)
ylim = (2e1,8e4)

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='miR', y='output_gmean', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlim=xlim, xlabel='', ylabel='', yscale='log', ylim=ylim)
marker_baseline = stats.loc[(stats['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

# stat std
ax = axes[1]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='miR', y='output_std', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Std.', xlim=xlim, xlabel='', ylabel='', yscale='log', ylim=ylim)

# slope
ax = axes[2]
for construct, group in plot_df2.groupby('construct'):
    sns.stripplot(data=group, x='miR', y='slope', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], s=4, jitter=0.1, linewidth=0.5, edgecolor='white')
ax.set(title='Slope', xlim=xlim, xlabel='', ylim=(0,1.2), ylabel='',
       yticks=[0,0.25,0.5,0.75,1], yticklabels=['0.0','','0.5','','1.0'])
marker_baseline = fits.loc[(fits['group']=='marker'), 'slope'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

for ax in axes:
    ax.set_xticklabels([l.get_text().replace('.','-') for l in ax.get_xticklabels()], rotation=45, ha='right')
    sns.despine(ax=ax)
    ax.minorticks_off()

fig.savefig(rd.outfile(fig_path))

In [None]:
subfig = subfigures['B']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.5, top=0.65, right=0.15)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.35))

# EF1a miR controls
ts_order = ['none','FF3x1','FF4x1','FF5x1','FF6x1']
plot_df = stats[((((stats['group']=='ts5')) & (stats['ts_num']==1)) | (stats['group']=='base')) &
                (stats['promoter']=='EF1a')].copy()
plot_df['ts'] = plot_df['ts'].astype(pd.api.types.CategoricalDtype(categories=ts_order, ordered=True))
plot_df2 = fits[((((fits['group']=='ts5')) & (fits['ts_num']==1)) | (fits['group']=='base')) &
                (fits['promoter']=='EF1a')].copy()
plot_df2['ts'] = plot_df2['ts'].astype(pd.api.types.CategoricalDtype(categories=ts_order, ordered=True))
xlim = (-0.5, len(plot_df['ts'].unique())-0.5)

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='ts', y='output_gmean', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlim=xlim, xlabel='', ylabel='', yscale='log', ylim=ylim)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
marker_baseline = stats.loc[(stats['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

# stat std
ax = axes[1]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='ts', y='output_std', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Std.', xlim=xlim, xlabel='', ylabel='', yscale='log', ylim=ylim)

# slope
ax = axes[2]
for construct, group in plot_df2.groupby('construct'):
    sns.stripplot(data=group, x='ts', y='slope', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlim=xlim, xlabel='', ylim=(0,1.2), ylabel='',
       yticks=[0,0.25,0.5,0.75,1], yticklabels=['0.0','','0.5','','1.0'])
marker_baseline = fits.loc[(fits['group']=='marker'), 'slope'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

for ax in axes:
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
    sns.despine(ax=ax)
    ax.minorticks_off()

fig.savefig(rd.outfile(fig_path))

In [None]:
subfig = subfigures['C']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.7, bottom=0.4, top=0.35, right=0.15)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.45))

plot_df1 = stats[(stats['miR']=='miR.FF4') & (stats['group']=='controller') & (stats['ts_num']==1) &
                (stats['promoter']=='EF1a')]
plot_df2 = stats_pb
plot_df3 = stats_lenti[stats_lenti['group'].isin(['controller','base']) & (stats_lenti['dox']==1000) & (stats_lenti['virus_dilution']==1) & 
                      (stats_lenti['cell']=='293T') & ~((stats_lenti['biorep']<4) & (stats_lenti['design']==3))]
plot_df_list = {'Transfection': plot_df1, 'PiggyBac': plot_df2, 'Lentivirus': plot_df3,}

for ax, (name, plot_df) in zip(axes, plot_df_list.items()):
    for construct, group in plot_df.groupby('construct'):
        sns.stripplot(data=group, x='design', y='output_std', hue='construct', palette=designs_palette,
                        legend=False, ax=ax, marker=designs_markers[construct], **scatter_kwargs)
    ax.set(title=name, xlim=(-0.5, len(plot_df['design'].unique())-0.5), xlabel='', ylabel='', yscale='log',)

axes[0].set(xlabel='design',ylabel='Standard deviation')

fig.savefig(rd.outfile(fig_path))

In [None]:
def designs_stats_plots(axes, plot_df, plot_df2):

    xlim = (-0.5, len(plot_df['design'].unique())-0.5)

    # stat gmean
    ax = axes[0]
    for construct, group in plot_df.groupby('construct'):
        sns.stripplot(data=group, x='design', y='output_gmean', hue='construct', palette=designs_palette,
                        legend=False, ax=ax, marker=designs_markers[construct], **scatter_kwargs)
    ax.set(title='Mean', xlim=xlim, xlabel='', ylabel='', yscale='log',)
    marker_baseline = stats.loc[(stats['group']=='marker'), 'output_gmean'].mean()
    ax.axhline(marker_baseline, color='black', ls=':')
    ax.minorticks_off()

    # stat std
    ax = axes[1]
    for construct, group in plot_df.groupby('construct'):
        sns.stripplot(data=group, x='design', y='output_std', hue='construct', palette=designs_palette,
                        legend=False, ax=ax, marker=designs_markers[construct], **scatter_kwargs)
    ax.set(title='Std.', xlim=xlim, xlabel='design', ylabel='', yscale='log',)

    # slope
    ax = axes[2]
    for construct, group in plot_df2.groupby('construct'):
        sns.stripplot(data=group, x='design', y='slope', hue='construct', palette=designs_palette,
                        legend=False, ax=ax, marker=designs_markers[construct], **scatter_kwargs)
    ax.set(title='Slope', xlim=xlim, xlabel='', ylabel='', ylim=(0,1.2),
        yticks=[0,0.25,0.5,0.75,1], yticklabels=['0.0','','0.5','','1.0'])
    marker_baseline = fits.loc[(fits['group']=='marker'), 'slope'].mean()
    ax.axhline(marker_baseline, color='black', ls=':')

    for ax in axes: sns.despine(ax=ax)

In [None]:
subfig = subfigures['D']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.7, bottom=0.4, top=0.35, right=0.15)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.45))

plot_df = stats[(stats['miR']=='miR.FF4') & (stats['group']=='controller') & (stats['ts_num']==1) &
                       (stats['promoter']=='EF1a')]
plot_df2 = fits[(fits['miR']=='miR.FF4') & (fits['group']=='controller') & (fits['ts_num']==1) &
                       (fits['promoter']=='EF1a')]

designs_stats_plots(axes, plot_df, plot_df2)

fig.savefig(rd.outfile(fig_path))

In [None]:
subfig = subfigures['E']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.7, bottom=0.4, top=0.35, right=0.15)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.45))

plot_df = stats[(stats['miR']=='miR.FF5') & (stats['group']=='controller') & (stats['ts_num']==1) & 
                       (stats['ts'].isin(['FF4x1','FF5x1','FF6x1'])) & (stats['promoter']=='EF1a')]
plot_df2 = fits[(fits['miR']=='miR.FF5') & (fits['group']=='controller') & (fits['ts_num']==1) &
                       (fits['ts'].isin(['FF4x1','FF5x1','FF6x1'])) & (fits['promoter']=='EF1a')]

designs_stats_plots(axes, plot_df, plot_df2)

fig.savefig(rd.outfile(fig_path))

In [None]:
subfig = subfigures['F']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.7, bottom=0.4, top=0.35, right=0.15)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.45))

plot_df = stats[(stats['miR']=='miRE.FF5') & (stats['group']=='controller') & (stats['ts_num']==1) &
                       (stats['promoter']=='EF1a')]
plot_df2 = fits[(fits['miR']=='miRE.FF5') & (fits['group']=='controller') & (fits['ts_num']==1) &
                       (fits['promoter']=='EF1a')]

designs_stats_plots(axes, plot_df, plot_df2)

fig.savefig(rd.outfile(fig_path))

In [None]:
# two-gene architectures with 5'UTR target sites
stats_subset = stats2[((stats2['gene']=='1T') & (stats2['design']>1) & (stats2['group']=='controller')) |
                ((stats2['gene']=='2T') & (stats2['group']=='dual') & (stats2['ts_loc']=='5\'')) |
                ((stats2['gene']=='2V') & (stats2['group']=='ts5')) |
                (stats2['group']=='base')].copy()
stats_subset.sort_values(['gene','construct2_promoter','group','ts_kind'], inplace=True)
fits_subset = fits2[((fits2['gene']=='1T') & (fits2['design']>1) & (fits2['group']=='controller')) |
                ((fits2['gene']=='2T') & (fits2['group']=='dual') & (fits2['ts_loc']=='5\'')) |
                ((fits2['gene']=='2V') & (fits2['group']=='ts5')) |
                (fits2['group']=='base')].copy()
fits_subset.sort_values(['gene','construct2_promoter','group','ts_kind'], inplace=True)
df_quantiles_subset = df_quantiles2[(((df_quantiles2['gene']=='1T') & (df_quantiles2['design']>1) & (df_quantiles2['group']=='controller')) |
                        ((df_quantiles2['gene']=='2T') & (df_quantiles2['group']=='dual')) |
                        ((df_quantiles2['gene']=='2V') & (df_quantiles2['group']=='ts5')) |
                        (df_quantiles2['group']=='base'))].copy()
df_quantiles_subset.sort_values(['gene','construct2_promoter','group','ts_kind'], inplace=True)

# shift xticks to add more space between architecture groups
buffer = 0.6
gene_order = ['1T', '2T', '2V']
xtick_locs = [0,1,2,3,4, 5+buffer,6+buffer, 7+buffer*2,8+buffer*2,9+buffer*2, 10+buffer*3,11+buffer*3,12+buffer*3,]

condition_loc = {k:v for k,v in zip(stats_subset['condition'].unique(), xtick_locs)}
stats_subset['condition_loc'] = stats_subset['condition'].replace(condition_loc)

condition_loc = {k:v for k,v in zip(fits_subset['condition'].unique(), xtick_locs)}
fits_subset['condition_loc'] = fits_subset['condition'].replace(condition_loc)

metadata_comb['condition_loc'] = metadata_comb['condition'].map(condition_loc)
m = metadata_comb.dropna()
m['condition_loc'] = m['condition_loc'].astype(str)

xlim_adjusted = (-0.5, len(stats_subset['condition'].unique())-0.5+buffer*(len(gene_order)))
scatter_kwargs2 = dict(s=4, jitter=0.1, linewidth=0.5, edgecolor='white', native_scale=True)

In [None]:
subfig = subfigures['H']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.55, top=0.35, right=0.1)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.3))

plot_df = stats_subset
plot_df2 = fits_subset

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.stripplot(data=group, x='condition_loc', y='output_gmean', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs2)
ax.set(title='Mean', xlim=xlim_adjusted, xlabel='', ylabel='', yscale='log', xticks=xtick_locs)
marker_baseline = stats2.loc[(stats2['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

# stat std
ax = axes[1]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.stripplot(data=group, x='condition_loc', y='output_std', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs2)
ax.set(title='Standard deviation', xlim=xlim_adjusted, xlabel='', ylabel='', yscale='log', xticks=xtick_locs)

# slope
ax = axes[2]
for construct, group in plot_df2.groupby('condition', sort=False):
    sns.stripplot(data=group, x='condition_loc', y='slope', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs2)

ax.set(title='Slope', xlim=xlim_adjusted, xlabel='', ylabel='', xticks=xtick_locs,)
marker_baseline = fits2.loc[(fits2['group']=='marker'), 'slope'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

for i,ax in enumerate(axes):
    yloc = -6
    ax.axvspan(4.5+buffer/2, 6.5+buffer*1.5, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    ax.axvspan(9.5+buffer*2.5, 12.5+buffer*3.5, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    ax.annotate(architecture_order[0], (2,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
    ax.annotate(architecture_order[1], (5.5+buffer,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
    ax.annotate('2V\nEF1a', (8+buffer*2,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
    ax.annotate('2V\nU6', (11+buffer*3,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
    ax.set_xticklabels(['']*len(ax.get_xticklabels()))
    sns.despine(ax=ax)

fig.savefig(rd.outfile(fig_path))

In [None]:
subfig = subfigures['J']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.45, top=0.35, right=0.15)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.3))

# iPSC controls
group_order = ['base','miR','ts3','ts5']
plot_df = stats_lenti[~stats_lenti['group'].isin(['controller','marker']) & (stats_lenti['cell']=='293T') &
                      (stats_lenti['dox']==1000) & (stats_lenti['virus_dilution']==1)].copy()
plot_df['group'] = plot_df['group'].astype(pd.api.types.CategoricalDtype(categories=group_order, ordered=True))
plot_df.sort_values(['group','ts'], inplace=True)

plot_df2 = fits_lenti[~fits_lenti['group'].isin(['controller','marker']) & (fits_lenti['cell']=='293T') & 
                      (fits_lenti['dox']==1000) & (fits_lenti['virus_dilution']==1)].copy()
plot_df2['group'] = plot_df2['group'].astype(pd.api.types.CategoricalDtype(categories=group_order, ordered=True))
plot_df2.sort_values(['group','ts'], inplace=True)

# shift xticks to add more space between promoter groups
buffer = 0.6
num_groups = 3
xtick_locs = [0, 1+buffer, 2+buffer] + [i+buffer*2 for i in range(3,7)]
construct_loc = {k:v for k,v in zip(plot_df['construct'].unique(), xtick_locs)}
plot_df['construct_loc'] = plot_df['construct'].replace(construct_loc)
construct_loc = {k:v for k,v in zip(plot_df2['construct'].unique(), xtick_locs)}
plot_df2['construct_loc'] = plot_df2['construct'].replace(construct_loc)
metadata['construct_loc'] = metadata['construct'].map(construct_loc)
m = metadata.dropna()
m['construct_loc'] = m['construct_loc'].astype(str)
scatter_kwargs2 = dict(s=4, jitter=0.3, linewidth=0.5, edgecolor='white', native_scale=True)
xlim = (-0.5, plot_df['construct_loc'].max()+0.5)

# adjust markers
m.loc[(m['miR_loc']=='CDS') | (m['ts_loc']=='3\''), 'markers'] = 'D'
m_dict = m.set_index('construct').to_dict('dict')
comb_markers = m_dict['markers']

# make xticklabels
def get_label(df):
    group = df['group'].unique()[0]
    d = df.copy()
    col_map = {'base': 'group', 'miR': 'miR', 'ts3': 'ts', 'ts5': 'ts'}
    d['label'] = d[col_map[group]]
    return d

m = m.groupby('group')[m.columns].apply(get_label).reset_index(drop=True)


# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct_loc', y='output_gmean', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=comb_markers[construct], **scatter_kwargs2)
ax.set(title='Mean', xlim=xlim, xlabel='', ylabel='', yscale='log', xticks=xtick_locs,)
baseline = baseline_df.loc[baseline_df['cell']=='293T', 'output'].mean()
ax.axhline(baseline, color='black', ls=':')

# stat std
ax = axes[1]
for construct, group in plot_df.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct_loc', y='output_std', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=comb_markers[construct], **scatter_kwargs2)
ax.set(title='Standard deviation', xlim=xlim, xlabel='', ylabel='', yscale='log', xticks=xtick_locs,)

# slope
ax = axes[2]
for construct, group in plot_df2.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct_loc', y='slope', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=comb_markers[construct], **scatter_kwargs2)
ax.set(title='Slope', xlim=xlim, xlabel='', ylim=(0,1.2), ylabel='', xticks=xtick_locs,
       yticks=[0,0.25,0.5,0.75,1], yticklabels=['0.0','','0.5','','1.0'])

for ax in axes:
    sns.despine(ax=ax)

    # add shaded region for miR-only constructs
    span1 = (xtick_locs[0]+(xtick_locs[1]-xtick_locs[0])/2, xtick_locs[2]+(xtick_locs[3]-xtick_locs[2])/2,)
    ax.axvspan(*span1, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    rd.plot.generate_xticklabels(m.drop_duplicates('construct_loc'), 'construct_loc', ['label'], annotate=False, ax=ax)
    ax.set_xticklabels([l.get_text().replace('.','-') for l in ax.get_xticklabels()], rotation=45, ha='right')
    ax.yaxis.set_minor_formatter(plt.NullFormatter())

fig.savefig(rd.outfile(fig_path))

In [None]:
subfig = subfigures['K']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.5, bottom=0.45, top=0.35, right=0.1)
axes = subfig.subplots(1,2, gridspec_kw=dict(width_ratios=(1,1), wspace=0.5))

plot_df = stats_lenti[(stats_lenti['group'].isin(['controller','base'])) & (stats_lenti['cell']=='293T') &
                      (stats_lenti['dox']==0) & (stats_lenti['virus_dilution']==1)].copy()
plot_df2 = fits_lenti[(fits_lenti['group'].isin(['controller','base'])) & (fits_lenti['cell']=='293T') &
                      (fits_lenti['dox']==0) & (fits_lenti['virus_dilution']==1)].copy()
xlim = (-0.5, len(plot_df['design'].unique())-0.5)

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='design', y='output_gmean', hue='construct', palette=designs_palette,
                    legend=False, ax=ax, marker=designs_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlim=xlim, xlabel='design', ylabel='', yscale='log', ylim=(2e1,6e1))
baseline = baseline_df.loc[baseline_df['cell']=='293T', 'output'].mean()
ax.axhline(baseline, color='black', ls=':')

# slope
ax = axes[1]
for construct, group in plot_df2.groupby('construct'):
    sns.stripplot(data=group, x='design', y='slope', hue='construct', palette=designs_palette,
                    legend=False, ax=ax, marker=designs_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlim=xlim, xlabel='design', ylabel='', ylim=(-0.2,0.2))
ax.axhline(0, color='black', ls=':')

fig.savefig(rd.outfile(fig_path))