In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

### Load data

Two-gene data (`data`)

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'
exp93_path = base_path/'2024.04.14_exp93'/'export'

plates = pd.DataFrame({
    'data_path': [exp93_path/'plate1', exp93_path/'plate2', exp93_path/'plate3',],
    'yaml_path': [exp93_path/'exp93_wells.yaml']*3,
    'biorep': [1, 2, 3],
    'exp': ['exp93']*3,
})

cache_path = rd.rootdir/'output'/'fig_architecture'/'data.gzip'

# Load data
data = pd.DataFrame()
if cache_path.is_file(): data = pd.read_parquet(cache_path)
else: 
    channel_list = ['mRuby2-A','FSC-A','SSC-A','tagBFP-A','mGL-A','SNAP-647-A']
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data = data[data[c]>0]
    
    data.dropna(inplace=True)
    data.to_parquet(rd.outfile(cache_path))

In [None]:
# Add metadata for constructs
metadata_path = rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx'
metadata = base.get_metadata(metadata_path)
data = data.merge(metadata, how='left', on='construct')
metadata_construct2 = pd.read_excel(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct2-metadata.xlsx')
data = data.merge(metadata_construct2, how='left', on='construct2')
data['condition'] = data['construct'] + '_' + data['construct2']

# Rename far-red channel
data.rename(columns={'SNAP-647-A': 'iRFP-A'}, inplace=True)
display(data)

In [None]:
# Gate cells
gates = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A']
for channel in channel_list:
    gates[channel] = data[data['construct']=='GEEC555'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates.reset_index(inplace=True)

# Add missing iRFP gate
gate_iRFP = 2.5e2
gates['iRFP-A'] = [gate_iRFP]

# Indicate which channels are relevant for each experiment
gates.sort_values(['exp'], inplace=True)
gates['marker'] = 'iRFP-A'
gates['output'] = 'mRuby2-A'

# Gate data by transfection marker expression
data = data.groupby('exp')[data.columns].apply(lambda x: base.gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
df = data[(data['expressing']) & (data['construct']!='UT')]

In [None]:
# Bin data and calculate statistics
by = ['condition','construct','construct2','biorep','exp']
df_quantiles, stats, _, fits = base.calculate_bins_stats(df, by=by)
df_quantiles.sort_values(['design','ts_kind'], inplace=True)

stats = stats.merge(metadata, how='left', on='construct')
stats.sort_values(['design','ts_kind'], inplace=True)
stats = stats.merge(metadata_construct2, how='left', on='construct2')

fits = fits.merge(metadata, how='left', on='construct')
fits.sort_values(['design','ts_kind'], inplace=True)
fits = fits.merge(metadata_construct2, how='left', on='construct2')


In [None]:
# output range of 5-95 percentile
p_5 = df.groupby(by)[['output']].apply(lambda x: np.percentile(x, 5)).rename('output_5th').reset_index()
p_95 = df.groupby(by)[['output']].apply(lambda x: np.percentile(x, 95)).rename('output_95th').reset_index()
stats = stats.merge(p_5, how='left')
stats = stats.merge(p_95, how='left')
stats['output_range'] = stats['output_95th'] - stats['output_5th']
stats['output_range_log'] = stats['output_95th'].apply(np.log10) - stats['output_5th'].apply(np.log10)

# fraction within 1 order of magnitude (10x) around median
def get_high_low(df):
    median = df['output'].median()
    return df.loc[(df['output']>(median * 10**(-0.5))) & (df['output']<(median * 10**0.5)), 'output'].count() / df['output'].count()

fraction = df.groupby(by)[df.columns].apply(get_high_low).rename('fraction_within_10x').reset_index()
stats = stats.merge(fraction, how='left')

In [None]:
metadata_comb = data.drop_duplicates('condition')[['construct','construct2','condition']]
metadata_comb = metadata_comb.merge(metadata, how='left', on='construct')
metadata_comb = metadata_comb.merge(metadata_construct2, how='left', on='construct2')

# Create color palette by architecture
metadata_comb.loc[metadata_comb['gene']=='1T', 'color'] = base.colors['teal']
metadata_comb.loc[metadata_comb['gene']=='2T', 'color'] = base.colors['green']
metadata_comb.loc[metadata_comb['gene']=='2V', 'color'] = base.colors['purple']

# markers
metadata_comb['markers'] = 'X'
metadata_comb.loc[metadata_comb['gene']=='1T', 'markers'] = 'o'
metadata_comb.loc[metadata_comb['gene']=='2T', 'markers'] = 'D'
metadata_comb.loc[metadata_comb['gene']=='2V', 'markers'] = 's'

ts_label = {'na': 'base', 'NT': 'OL', 'T': 'CL'}
metadata_comb['ts_label'] = metadata_comb['ts_kind'].replace(ts_label)

metadata_dict = metadata_comb.set_index('gene').to_dict('dict')
gene_palette = metadata_dict['color']
gene_markers = metadata_dict['markers']

# Create color palette by kind (design)
metadata_comb.loc[(metadata_comb['gene']=='1T') & (metadata_comb['design']==2), 'color'] = base.colors['orange']
metadata_comb.loc[(metadata_comb['gene']=='1T') & (metadata_comb['design']==3), 'color'] = base.colors['red']

metadata_comb['kind'] = metadata_comb['gene'] + '_' + metadata_comb['design'].astype(str)
metadata_dict = metadata_comb.set_index('kind').to_dict('dict')
kind_palette = metadata_dict['color']
kind_markers = metadata_dict['markers']

# Create color palette by condition (design)
metadata_comb.loc[(metadata_comb['gene']=='2V') & (metadata_comb['construct2_promoter']=='U6'), 'color'] = base.colors['blue']
metadata_comb.loc[(metadata_comb['ts_kind']=='NT'), 'color'] = base.colors['gray']
metadata_comb.loc[(metadata_comb['ts_kind']=='NT') & (metadata_comb['gene']=='1T') & (metadata_comb['design']==3), 
                  'color'] = metadata_comb.loc[(metadata_comb['ts_kind']=='NT') & (metadata_comb['gene']=='1T') & 
                                               (metadata_comb['design']==3), 'color'].apply(base.get_dark_color)
metadata_comb.loc[metadata_comb['group'].isin(['base','marker']), 'color'] = 'black'
metadata_comb['condition'] = metadata_comb['construct'] + '_' + metadata_comb['construct2']
metadata_dict = metadata_comb.set_index('condition').to_dict('dict')
condition_palette = metadata_dict['color']
condition_markers = metadata_dict['markers']

architecture_order = ['1T', '2T', '2V']

Load stochastic simulations (`data_sim`)

In [None]:
cache_path = rd.datadir/'projects'/'miR-iFFL'/'modeling'/'julia_stochastic_simulations'/'stochastic_sims.gzip'
data_sim = pd.read_parquet(cache_path)

# Rename
data_sim.rename(columns={'copynum': 'copy_num', 'reg_gene': 'output', 'unreg_gene': 'marker'}, inplace=True)
data_sim['gene'] = data_sim['design'].map({'Design 1': '1T', 'Design 2': '1T', 'Design 3': '1T',
                                           'Dual Vector': '2V', 'Dual Transcript': '2T'})
data_sim['design'] = data_sim['design'].map({'Design 1': 1, 'Design 2': 2, 'Design 3': 3,
                                           'Dual Vector': 0, 'Dual Transcript': 0})
display(data_sim)

In [None]:
# Bin data and calculate statistics
by = ['design','moi','risc','gene']
_, stats_sim, _, fits_sim = base.calculate_bins_stats(data_sim, by=by)

data_sim['kind'] = data_sim['gene'] + '_' + data_sim['design'].astype(str)
stats_sim['kind'] = stats_sim['gene'] + '_' + stats_sim['design'].astype(str)
fits_sim['kind'] = fits_sim['gene'] + '_' + fits_sim['design'].astype(str)

data_sim['condition'] = data_sim['gene'] + '_' + data_sim['moi'].astype(str)
stats_sim['condition'] = stats_sim['gene'] + '_' + stats_sim['moi'].astype(str)
fits_sim['condition'] = fits_sim['gene'] + '_' + fits_sim['moi'].astype(str)

### Set up figure

In [None]:
base_size = base.font_sizes['base_size']
smaller_size = base.font_sizes['smaller_size']

sns.set_style('ticks')
sns.set_context('paper', font_scale=1.0, rc={'font.size': base_size, 'font.family': 'sans-serif', 'font.sans-serif':['Arial']})
plt.rcParams.update({'axes.titlesize': base_size, 'axes.labelsize': base_size, 'xtick.labelsize': smaller_size, 'ytick.labelsize': smaller_size,
                     'pdf.fonttype': 42, 
                     'ytick.major.size': 3, 'xtick.major.size': 3, 'ytick.minor.size': 2, 'ytick.major.pad': 2, 'xtick.major.pad': 2, 
                     'lines.linewidth': 1,
                     'axes.spines.right': False, 'axes.spines.top': False, 'axes.labelpad': 2})

In [None]:
# Create the overall figure, gridspec, and add subfigure labels
fig = plt.figure(figsize=(6.8504,3))
fig_gridspec = matplotlib.gridspec.GridSpec(2, 6, figure=fig,
    wspace=0.4, hspace=0.4, height_ratios=[1,1], width_ratios=[1]*6)
subfigures = {
    'A': fig.add_subfigure(fig_gridspec[0,:2]),
    'B': fig.add_subfigure(fig_gridspec[0,2:]),
    'C': fig.add_subfigure(fig_gridspec[1,:2]),
    'D': fig.add_subfigure(fig_gridspec[1,2:]),
}
for label, subfig in subfigures.items():
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}', fontsize=base.font_sizes['subpanel_label'], fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))

scatter_kwargs = dict(s=4, jitter=0.2, linewidth=0.5, edgecolor='white')

output_path = rd.rootdir/'output'/'fig_architecture-supp'
fig_path = output_path/'fig_architecture-supp.pdf'
fig.savefig(rd.outfile(fig_path))

In [None]:
# two-gene architectures with 3'UTR target sites
stats_subset = stats[((stats['gene']=='1T') & (stats['design']==1) & (stats['group']=='controller')) |
                ((stats['gene']=='2T') & (stats['group']=='dual') & (stats['ts_loc']=='3\'')) |
                ((stats['gene']=='2V') & (stats['group']=='ts3') & (stats['construct2_promoter']=='EF1a')) |
                ((stats['group']=='base') & (stats['construct2_promoter']!='U6'))].copy()
stats_subset.sort_values(['gene','construct2_promoter','group','ts_kind'], inplace=True)
fits_subset = fits[((fits['gene']=='1T') & (fits['design']==1) & (fits['group']=='controller')) |
                ((fits['gene']=='2T') & (fits['group']=='dual') & (fits['ts_loc']=='3\'')) |
                ((fits['gene']=='2V') & (fits['group']=='ts3') & (fits['construct2_promoter']=='EF1a')) |
                ((fits['group']=='base') & (fits['construct2_promoter']!='U6'))].copy()
fits_subset.sort_values(['gene','construct2_promoter','group','ts_kind'], inplace=True)
df_quantiles_subset = df_quantiles[(((df_quantiles['gene']=='1T') & (df_quantiles['design']==1) & (df_quantiles['group']=='controller')) |
                        ((df_quantiles['gene']=='2T') & (df_quantiles['group']=='dual') & (df_quantiles['ts_loc']=='3\'')) |
                        ((df_quantiles['gene']=='2V') & (df_quantiles['group']=='ts3') & (df_quantiles['construct2_promoter']=='EF1a')) |
                        ((df_quantiles['group']=='base') & (df_quantiles['construct2_promoter']!='U6')))].copy()
df_quantiles_subset.sort_values(['gene','construct2_promoter'], inplace=True)

# shift xticks to add more space between architecture groups
buffer = 0.6
gene_order = ['1T', '2T', '2V']
xtick_locs = [0,1,2, 3+buffer,4+buffer, 5+buffer*2,6+buffer*2,7+buffer*2,]

condition_loc = {k:v for k,v in zip(stats_subset['condition'].unique(), xtick_locs)}
stats_subset['condition_loc'] = stats_subset['condition'].replace(condition_loc)

condition_loc = {k:v for k,v in zip(fits_subset['condition'].unique(), xtick_locs)}
fits_subset['condition_loc'] = fits_subset['condition'].replace(condition_loc)

metadata_comb['condition_loc'] = metadata_comb['condition'].map(condition_loc)
m = metadata_comb.dropna()
m['condition_loc'] = m['condition_loc'].astype(str)

xlim_adjusted = (-0.5, len(stats_subset['condition'].unique())-0.5+buffer*(len(gene_order)-1))
scatter_kwargs2 = dict(s=4, jitter=0.1, linewidth=0.5, edgecolor='white', native_scale=True)

In [None]:
subfig = subfigures['A']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.4, top=0.35, right=0.1)
axes = subfig.subplots(1,1,)

plot_df = stats_subset
plot_df2 = fits_subset

# stat std
ax = axes
for construct, group in plot_df.groupby('condition', sort=False):
    sns.stripplot(data=group, x='condition_loc', y='output_std', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs2)
ax.set(title='Standard deviation', xlim=xlim_adjusted, xlabel='', ylabel='', yscale='log', xticks=xtick_locs)
marker_baseline = stats.loc[(stats['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

yloc = -6
ax.axvspan(2.5+buffer/2, 4.5+buffer*1.5, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
ax.annotate(architecture_order[0], (1,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
ax.annotate(architecture_order[1], (3.5+buffer,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
ax.annotate(architecture_order[2], (6+buffer*2,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
ax.set_xticklabels(['']*len(ax.get_xticklabels()))
ax.set_xlabel(ax.get_xlabel(), labelpad=10)
sns.despine(ax=ax)
ax.minorticks_off()

fig.savefig(rd.outfile(fig_path))

In [None]:
subfig = subfigures['B']
rd.plot.adjust_subplot_margins_inches(subfig, left=1.5, bottom=0.4, top=0.35, right=0.1)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.4))

plot_df = stats[(stats['construct2_promoter']=='U6') & (stats['ts_loc']!='5\'')].copy()
plot_df2 = fits[(fits['construct2_promoter']=='U6') & (fits['ts_loc']!='5\'')].copy()
plot_df['ts_kind'] = plot_df['ts_kind'].astype(pd.api.types.CategoricalDtype(categories=list(ts_label.keys()), ordered=True))
plot_df2['ts_kind'] = plot_df2['ts_kind'].astype(pd.api.types.CategoricalDtype(categories=list(ts_label.keys()), ordered=True))

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.stripplot(data=group, x='ts_kind', y='output_gmean', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log',)
marker_baseline = stats.loc[(stats['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

# stat std
ax = axes[1]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.stripplot(data=group, x='ts_kind', y='output_std', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs)
ax.set(title='Std.', xlabel='', ylabel='', yscale='log',)

# slope
ax = axes[2]
for construct, group in plot_df2.groupby('condition', sort=False):
    sns.stripplot(data=group, x='ts_kind', y='slope', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs)
marker_baseline = fits.loc[(fits['group']=='marker'), 'slope'].mean()
ax.axhline(marker_baseline, color='black', ls=':')
ax.set(title='Slope', xlabel='', ylabel='', ylim=(ax.get_ylim()[0],1), 
       yticklabels=[f'{float(re.sub(u"\u2212", "-", l.get_text())):.1f}' for l in ax.get_yticklabels()])

for i,ax in enumerate(axes):
    ax.set_xticklabels([ts_label[l.get_text()] for l in ax.get_xticklabels()], ha='right', rotation=45)
    sns.despine(ax=ax)

fig.savefig(rd.outfile(fig_path))

In [None]:
subfig = subfigures['C']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.2, bottom=0.3, top=0.6, right=0.1)
axes = subfig.subplots(1,3, gridspec_kw=dict(wspace=0.2,))

plot_df = data_sim[(data_sim['risc']==10000) & (data_sim['design']<=1)]
xticks = [0,1,5,10,15,20]
xticklabels = [str(x) for x in xticks]
xticklabels[0] = ''

for i,moi in enumerate(plot_df['moi'].unique()):

    # Poisson distribution
    ax = axes[i]
    x_range = range(1,20)
    ax.plot(x_range, base.truncated_poisson(x_range, moi), '-o', ms=2, color=base.colors['gray'], ls='-')
    ax.set(yticks=[], yticklabels=[], xticks=xticks, xticklabels=xticklabels, title=str(moi))
    if i==1: ax.set(xlabel='copy #, $c$')
    elif i==0: ax.set(title='MOI = '+ax.get_title())
    sns.despine(ax=ax, left=True)

for ax in axes.flatten(): ax.minorticks_off()
    
fig.savefig(rd.outfile(fig_path))

In [None]:
subfig = subfigures['D']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.3, top=0.35, right=0.1)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.3))

plot_df = stats_sim[(stats_sim['design']<=1) & (stats_sim['risc']==10000)]
plot_df2 = fits_sim[(fits_sim['design']<=1) & (fits_sim['risc']==10000)]
plot_df.sort_values(['moi','gene'], inplace=True)
plot_df2.sort_values(['moi','gene'], inplace=True)

buffer = 0.6
moi_list = plot_df['moi'].unique()
xtick_locs = np.concatenate([[x+i*(3+buffer) for x in range(3)] for i in range(len(moi_list))])
xlim_adjusted = (-0.5, len(plot_df['condition'].unique())-0.5+buffer*(len(gene_order)-1))

condition_loc = {k:v for k,v in zip(plot_df['condition'].unique(), xtick_locs)}
plot_df['condition_loc'] = plot_df['condition'].replace(condition_loc)
condition_loc = {k:v for k,v in zip(plot_df2['condition'].unique(), xtick_locs)}
plot_df2['condition_loc'] = plot_df2['condition'].replace(condition_loc)

# stat gmean
ax = axes[0]
for (gene, moi), group in plot_df.groupby(['gene','moi'], sort=False):
    sns.stripplot(data=group, x='condition_loc', y='marker_gmean', color=base.colors['gray'],
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
    sns.stripplot(data=group, x='condition_loc', y='output_gmean', hue='gene', palette=gene_palette,
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
ax.set(title='Mean', xlim=xlim_adjusted, xlabel='MOI', ylabel='', yscale='log', xticks=xtick_locs)

# stat std
ax = axes[1]
for (gene, moi), group in plot_df.groupby(['gene','moi'], sort=False):
    sns.stripplot(data=group, x='condition_loc', y='marker_std', color=base.colors['gray'],
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
    sns.stripplot(data=group, x='condition_loc', y='output_std', hue='gene', palette=gene_palette,
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
ax.set(title='Standard deviation', xlim=xlim_adjusted, xlabel='', ylabel='', yscale='log', xticks=xtick_locs,)
#       ylim=(1e2,1e3))
ax.yaxis.set_minor_formatter(plt.NullFormatter())

# slope
ax = axes[2]
for (gene, moi), group in plot_df2.groupby(['gene','moi'], sort=False):
    sns.stripplot(data=group, x='condition_loc', y='slope', hue='gene', palette=gene_palette,
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
ax.set(title='Slope', xlim=xlim_adjusted, xlabel='', ylabel='', xticks=xtick_locs, yticks=[0.6,0.7,0.8,0.9,1])

for i,ax in enumerate(axes):
    yloc = -6
    ax.axvspan(2.5+buffer/2, 5.5+buffer*1.5, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    ax.annotate(moi_list[0], (1,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
    ax.annotate(moi_list[1], (4+buffer,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
    ax.annotate(moi_list[2], (7+buffer*2,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
    ax.set_xticklabels(['']*len(ax.get_xticklabels()))
    ax.set_xlabel(ax.get_xlabel(), labelpad=10)
    sns.despine(ax=ax)

fig.savefig(rd.outfile(fig_path))