In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import rushd as rd
import scipy as sp
import seaborn as sns
from statannotations.Annotator import Annotator

# enables concurrent editing of base.py
from importlib import reload
reload(base)

### Load data

Two-gene data (`data`)

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'
metadata_path = rd.datadir/'projects'/'miR-iFFL'/'plasmids'
data, quantiles, stats, metadata = base.load_data(base_path, metadata_path, 'two_gene')

In [None]:
# Extract palette dicts from dataframes
metadata_dict = metadata.set_index('gene').to_dict('dict')
gene_palette = metadata_dict['color']
gene_markers = metadata_dict['markers']

metadata_dict = metadata.set_index('kind').to_dict('dict')
kind_palette = metadata_dict['color']
kind_markers = metadata_dict['markers']

# Create color palette by condition
metadata.loc[(metadata['gene']=='2V') & (metadata['construct2_promoter']=='U6'), 'color'] = base.colors['blue']
metadata.loc[(metadata['ts_kind']=='NT'), 'color'] = base.colors['gray']
metadata.loc[metadata['group'].isin(['base','marker']), 'color'] = 'black'

metadata_dict = metadata.set_index('condition').to_dict('dict')
condition_palette = metadata_dict['color']
condition_markers = metadata_dict['markers']

# Additional palettes for missplicing models
model_palette = {
    'base': base.colors['teal'],
    'more_mRNA': base.colors['orange'],
    'more_miRNA': base.colors['red']
}

model_markers = {
    'base': 'o',
    'more_mRNA': 'D',
    'more_miRNA': 's'
}

model_label = {'base': 'orig.', 'more_mRNA': 'A', 'more_miRNA': 'B'}

In [None]:
# Data for two-gene architectures with 3'UTR target sites
stats_subset = stats[((stats['gene']=='1T') & (stats['design']==1) & (stats['group']=='controller')) |
                ((stats['gene']=='2T') & (stats['group']=='dual') & (stats['ts_loc']=='3\'')) |
                ((stats['gene']=='2V') & (stats['group']=='ts3') & (stats['construct2_promoter']=='EF1a')) |
                ((stats['group']=='base') & (stats['construct2_promoter']!='U6'))].copy()
stats_subset.sort_values(['gene','construct2_promoter','group','ts_kind'], inplace=True)

# Remove outliers (1T base biorep1, 2V EF1a base/NT biorep4 -- see `flow/outliers.ipynb`)
stats_subset = stats_subset[~((stats_subset['gene']=='1T') & (stats_subset['group']=='base') & (stats_subset['biorep']==1))]
stats_subset = stats_subset[~((stats_subset['gene']=='2V') & (stats_subset['construct2_promoter']=='EF1a') & (stats_subset['ts_kind'].isin(['na','NT'])) & (stats_subset['biorep']==4))]

Load stochastic simulations (`sim_data`)

In [None]:
simulation_path = rd.datadir/'projects'/'miR-iFFL'
sim_data, sim_stats = base.load_modeling(simulation_path, 'stochastic_sims')

In [None]:
sim_data2, sim_stats2 = base.load_modeling(rd.rootdir/'output', 'missplicing')

In [None]:
# Calculate stats, binning on copy number
copy_num_stats = sim_data[(sim_data['risc']==10000) & (sim_data['design']<2)].groupby(['kind','copy_num'])[['marker','output']].agg([np.mean, np.std]).reset_index()
copy_num_stats.columns = copy_num_stats.columns.map(lambda i: base.rename_multilevel_cols(i))

copy_num_stats2 = sim_data2.groupby(['model','copy_num'])[['marker','output']].agg([np.mean, np.std]).reset_index()
copy_num_stats2.columns = copy_num_stats2.columns.map(lambda i: base.rename_multilevel_cols(i))

# Compute slope
fits = copy_num_stats[copy_num_stats['copy_num']>8].groupby('kind')[copy_num_stats.columns].apply(lambda x: base.get_slope(x, 'marker_mean', 'output_mean')).reset_index()
fits2 = copy_num_stats2[copy_num_stats2['copy_num']>12].groupby('model')[copy_num_stats2.columns].apply(lambda x: base.get_slope(x, 'marker_mean', 'output_mean')).reset_index()

### Set up figure

In [None]:
# Set plotting context
sns.set_style('ticks')
sns.set_context('paper', font_scale=1.0, rc=base.rc_context)
plt.rcParams.update(base.rc_params)
scatter_kwargs = base.scatter_kwargs
annotate_kwargs = base.annotate_kwargs

# Create the overall figure, gridspec, and add subfigure labels
fig = plt.figure(figsize=(base.figure_width['full'], 1.5*4))
fig_gridspec = matplotlib.gridspec.GridSpec(4, 6, figure=fig,
    wspace=0.4, hspace=0.4, width_ratios=[1.6,0.4,0.6,1,1.1,1.3])

subfigures = {
    'A': fig.add_subfigure(fig_gridspec[0,:3]),
    'B': fig.add_subfigure(fig_gridspec[0,3:]),
    'C': fig.add_subfigure(fig_gridspec[1,:3]),
    'D': fig.add_subfigure(fig_gridspec[1,3:]),
    'E': fig.add_subfigure(fig_gridspec[2,:3]),
    'F': fig.add_subfigure(fig_gridspec[2,3:]),
    'G': fig.add_subfigure(fig_gridspec[2:,5:]),
    'H': fig.add_subfigure(fig_gridspec[3,:3]),
}

# Add subpanel labels
for label, subfig in subfigures.items():
    if '2' in label: continue
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}', fontsize=base.font_sizes['subpanel_label'], 
                                           fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))

# Save to output folder
output_path = rd.rootdir/'output'/'fig_architecture-supp'/'fig_architecture-supp.pdf'
fig.savefig(rd.outfile(output_path))

In [None]:
# Define values to shift xticks in plotting 
# to add more space between architecture groups
buffer = 0.6
gene_order = ['1T', '2T', '2V']
xtick_locs = [0,1,2, 3+buffer,4+buffer, 5+buffer*2,6+buffer*2,7+buffer*2,]

condition_loc = {k:v for k,v in zip(stats_subset['condition'].unique(), xtick_locs)}
stats_subset['condition_loc'] = stats_subset['condition'].replace(condition_loc)

metadata['condition_loc'] = metadata['condition'].map(condition_loc)
m = metadata.dropna()
m['condition_loc'] = m['condition_loc'].astype(str)

xlim_adjusted = (-0.5, len(stats_subset['condition'].unique())-0.5+buffer*(len(gene_order)-1))

In [None]:
subfig = subfigures['A']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.35, bottom=0.4, top=0.35, right=0.12)
axes = subfig.subplots(1,2, gridspec_kw=dict(wspace=0.2))

plot_df = stats_subset
display(plot_df['condition'].unique())

# stat std
ax = axes[0]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.pointplot(data=group, x='condition_loc', y='output_std', hue='condition', palette=condition_palette,
                    ax=ax, marker=condition_markers[construct], native_scale=True, **scatter_kwargs)
ax.set(title='Std.', xlim=xlim_adjusted, xlabel='', ylabel='', yscale='log', xticks=xtick_locs,
       ylim=(1.5e2,7e4))

# stat CV
ax = axes[1]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.pointplot(data=group, x='condition_loc', y='output_variation', hue='condition', palette=condition_palette,
                    ax=ax, marker=condition_markers[construct], native_scale=True, **scatter_kwargs)
ax.set(title='CV', xlim=xlim_adjusted, xlabel='', ylabel='',xticks=xtick_locs,
       ylim=(0,2.7))

yloc = -6
for ax in axes:
    ax.axvspan(2.5+buffer/2, 4.5+buffer*1.5, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    ax.annotate(gene_order[0], (1,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
    ax.annotate(gene_order[1], (3.5+buffer,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
    ax.annotate(gene_order[2], (6+buffer*2,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
    ax.set_xticklabels(['']*len(ax.get_xticklabels()))
    ax.set_xlabel(ax.get_xlabel(), labelpad=10)

fig.savefig(rd.outfile(output_path))

# perform statistical tests
pairs = [('base','CL'), ('OL','CL')]
for stat in ['output_std', 'output_variation']:
    f, axes = plt.subplots(1,len(gene_order), figsize=(5,2), gridspec_kw=dict(wspace=0.5))
    for ax, gene in zip(axes, gene_order):
        print('Circuit type: '+gene)
        test_df = plot_df[(plot_df['gene']==gene)]
        sns.stripplot(data=test_df, x='ts_label', y=stat, ax=ax, hue='condition', palette=condition_palette, legend=False)
        ax.set(ylabel='', xlabel='', title=gene, yscale='log')

        if gene=='2T': ps = pairs[1:]
        else: ps = pairs
        annotator = Annotator(ax, ps, data=test_df, x='ts_label', y=stat)
        annotator.configure(**annotate_kwargs,).apply_and_annotate()
        print('\n')
    axes[0].set(ylabel=stat)

In [None]:
subfig = subfigures['B']
rd.plot.adjust_subplot_margins_inches(subfig, left=1.4, bottom=0.4, top=0.35, right=0.1)
axes = subfig.subplots(1,4, gridspec_kw=dict(wspace=0.75))

plot_df = stats[(stats['construct2_promoter']=='U6') & (stats['ts_loc']!='5\'') & (stats['ts_kind']!='none')].copy()
plot_df.sort_values(by='ts_label', inplace=True, ascending=False)
display(plot_df['condition'].unique())

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.pointplot(data=group, x='ts_label', y='output_gmean', hue='condition', palette=condition_palette,
                    ax=ax, marker=condition_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log', ylim=(7e1,3e4))
marker_baseline = stats.loc[(stats['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')
ax.minorticks_off()

# stat std
ax = axes[1]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.pointplot(data=group, x='ts_label', y='output_std', hue='condition', palette=condition_palette,
                    ax=ax, marker=condition_markers[construct], **scatter_kwargs)
ax.set(title='Std.', xlabel='', ylabel='', yscale='log', ylim=(1e2,8e4))
ax.minorticks_off()

# slope
ax = axes[2]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.pointplot(data=group, x='ts_label', y='slope', hue='condition', palette=condition_palette,
                    ax=ax, marker=condition_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlabel='', ylabel='', ylim=(0.4,1.15), 
       yticklabels=[f'{float(re.sub(u"\u2212", "-", l.get_text())):.1f}' for l in ax.get_yticklabels()])

# stat CV
ax = axes[3]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.pointplot(data=group, x='ts_label', y='output_variation', hue='condition', palette=condition_palette,
                    ax=ax, marker=condition_markers[construct], **scatter_kwargs)
ax.set(title='CV', xlabel='', ylabel='', ylim=(0,3.1), yticks=[0,1,2])

for i,ax in enumerate(axes):
    ax.set_xticklabels(ax.get_xticklabels(), ha='right', rotation=45)

fig.savefig(rd.outfile(output_path))

# perform statistical tests
f, axes = plt.subplots(1,4, figsize=(7,2))
pairs = [('base','CL'), ('OL','CL')]
stat_list = ['output_gmean', 'output_std', 'slope', 'output_variation']
for ax, stat in zip(axes, stat_list):
    sns.stripplot(data=plot_df, x='ts_label', y=stat, ax=ax, hue='condition', palette=condition_palette, legend=False,)
    if stat != 'slope' and stat != 'output_variation': ax.set(yscale='log')
    annotator = Annotator(ax, pairs, data=plot_df, x='ts_label', y=stat,)
    annotator.configure(**annotate_kwargs, line_offset=2).apply_and_annotate()

In [None]:
subfig = subfigures['C']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.2, bottom=0.4, top=0.35, right=0.15)
axes = subfig.subplots(1,3, gridspec_kw=dict(wspace=0.2,))

plot_df = sim_data[(sim_data['risc']==10000) & (sim_data['design']<=1)]

for i,moi in enumerate(plot_df['moi'].unique()):

    # Poisson distribution
    ax = axes[i]
    x_range = range(1,20)
    ax.plot(x_range, base.truncated_poisson(x_range, moi), '-o', ms=2, color=base.colors['gray'], ls='-')
    ax.set(yticks=[], yticklabels=[], xticks=[1,5,10,15,20], title=str(moi))
    if i==1: ax.set(xlabel='copy number, $c$')
    elif i==0: ax.set(title='MOI = '+ax.get_title())
    sns.despine(ax=ax, left=True)

for ax in axes.flatten(): ax.minorticks_off()
    
fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['D']
rd.plot.adjust_subplot_margins_inches(subfig, left=1.3, bottom=0.4, top=0.35, right=0.1)
axes = subfig.subplots(1,2, gridspec_kw=dict(wspace=0.3))

plot_df = sim_stats[(sim_stats['design']<=1) & (sim_stats['risc']==10000)]
plot_df.sort_values(['moi','gene'], inplace=True)

buffer = 0.6
moi_list = plot_df['moi'].unique()
xtick_locs = np.concatenate([[x+i*(3+buffer) for x in range(3)] for i in range(len(moi_list))])
xlim_adjusted = (-0.5, len(plot_df['condition'].unique())-0.5+buffer*(len(gene_order)-1))

condition_loc = {k:v for k,v in zip(plot_df['condition'].unique(), xtick_locs)}
plot_df['condition_loc'] = plot_df['condition'].replace(condition_loc)

# stat gmean
ax = axes[0]
for (gene, moi), group in plot_df.groupby(['gene','moi'], sort=False):
    sns.pointplot(data=group, x='condition_loc', y='marker_gmean', color=base.colors['gray'],
                    ax=ax, marker=gene_markers[gene], native_scale=True, **scatter_kwargs)
    sns.pointplot(data=group, x='condition_loc', y='output_gmean', hue='gene', palette=gene_palette,
                    ax=ax, marker=gene_markers[gene], native_scale=True, **scatter_kwargs)
ax.set(title='Mean', xlim=xlim_adjusted, xlabel='MOI', ylabel='', yscale='log', xticks=xtick_locs)

# stat std
ax = axes[1]
for (gene, moi), group in plot_df.groupby(['gene','moi'], sort=False):
    sns.pointplot(data=group, x='condition_loc', y='marker_std', color=base.colors['gray'],
                    ax=ax, marker=gene_markers[gene], native_scale=True, **scatter_kwargs)
    sns.pointplot(data=group, x='condition_loc', y='output_std', hue='gene', palette=gene_palette,
                    ax=ax, marker=gene_markers[gene], native_scale=True, **scatter_kwargs)
ax.set(title='Std.', xlim=xlim_adjusted, xlabel='', ylabel='', yscale='log', xticks=xtick_locs,)
ax.yaxis.set_minor_formatter(plt.NullFormatter())

for i,ax in enumerate(axes):
    yloc = -6
    ax.axvspan(2.5+buffer/2, 5.5+buffer*1.5, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    ax.annotate(moi_list[0], (1,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
    ax.annotate(moi_list[1], (4+buffer,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
    ax.annotate(moi_list[2], (7+buffer*2,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
    ax.set_xticklabels(['']*len(ax.get_xticklabels()))
    ax.set_xlabel(ax.get_xlabel(), labelpad=10)

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['E']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.5, bottom=0.3, top=0.35, right=0.12)
axes = subfig.subplots(1,2, gridspec_kw=dict(wspace=0.3, width_ratios=[1,0.3]))

plot_df = copy_num_stats

# scatter marker vs output, binned by copy number
ax = axes[0]
for kind, group in plot_df.groupby('kind'):
    sns.scatterplot(data=group, x='marker_mean', y='output_mean', hue='kind', palette=kind_palette, 
                    marker=kind_markers[kind], ax=ax, legend=False)
    ax.errorbar(group['marker_mean'], group['output_mean'], xerr=group['marker_std'], yerr=group['output_std'],
                color=base.get_dark_color(kind_palette[kind]), fmt='none')
ax.set(xlabel='marker (#)', ylabel='output (#)', xlim=[0,15e3], ylim=[0,7e3])
ax.set(xticklabels=[f'{float(l.get_text())/1000:.0f}k' for l in ax.get_xticklabels()], yticklabels=[f'{float(l.get_text())/1000:.0f}k' for l in ax.get_yticklabels()])
ax.axvspan(plot_df.loc[(plot_df['copy_num']==8) | (plot_df['copy_num']==9), 'marker_mean'].median(), ax.get_xlim()[1],
           color=base.get_light_color(base.colors['gray']), alpha=0.2,)

# slope
ax = axes[1]
for construct, group in fits.groupby('kind', sort=False):
    sns.pointplot(data=group, x='kind', y='slope', hue='kind', palette=kind_palette,
                    ax=ax, marker=kind_markers[construct], native_scale=True, **scatter_kwargs)
ax.set(title='Slope', xlabel='model', ylabel='', xticklabels=[l.get_text().split('_')[0] for l in ax.get_xticklabels()],
       yticks=[0.2,0.3,0.4,0.5])

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['G']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.31, bottom=0.3, top=0.85, right=0.1)
axes = subfig.subplots(2,1, gridspec_kw=dict(hspace=0.4))

plot_df = sim_stats2
plot_df.sort_values(['moi','model'], inplace=True)

buffer = 0.6
moi_list = plot_df['moi'].unique()
model_order = ['base', 'more_mRNA', 'more_miRNA']
xtick_locs = np.concatenate([[x+i*(3+buffer) for x in range(3)] for i in range(len(moi_list))])
xlim_adjusted = (-0.5, len(plot_df['condition'].unique())-0.5+buffer*(len(model_order)-1))

condition_loc = {k:v for k,v in zip(plot_df['condition'].unique(), xtick_locs)}
plot_df['condition_loc'] = plot_df['condition'].replace(condition_loc)

# stat gmean
ax = axes[0]
for (model, moi), group in plot_df.groupby(['model','moi'], sort=False):
    sns.pointplot(data=group, x='condition_loc', y='marker_gmean', color=base.colors['gray'],
                    ax=ax, marker=model_markers[model], native_scale=True, **scatter_kwargs)
    sns.pointplot(data=group, x='condition_loc', y='output_gmean', hue='model', palette=model_palette,
                    ax=ax, marker=model_markers[model], native_scale=True, **scatter_kwargs)
ax.set(xlim=xlim_adjusted, xlabel='', ylabel='', yscale='log', xticks=xtick_locs, title='Mean')

# stat std
ax = axes[1]
for (model, moi), group in plot_df.groupby(['model','moi'], sort=False):
    sns.pointplot(data=group, x='condition_loc', y='marker_std', color=base.colors['gray'],
                    ax=ax, marker=model_markers[model], native_scale=True, **scatter_kwargs)
    sns.pointplot(data=group, x='condition_loc', y='output_std', hue='model', palette=model_palette,
                    ax=ax, marker=model_markers[model], native_scale=True, **scatter_kwargs)
ax.set(xlim=xlim_adjusted, xlabel='', ylabel='', yscale='log', xticks=xtick_locs, title='Std.')
ax.yaxis.set_minor_formatter(plt.NullFormatter())

yloc = -6
for i,ax in enumerate(axes):
    ax.axvspan(2.5+buffer/2, 5.5+buffer*1.5, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    ax.set_xticklabels(['']*len(ax.get_xticklabels()))

ax = axes[1]
ax.annotate(moi_list[0], (1,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
ax.annotate(moi_list[1], (4+buffer,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
ax.annotate(moi_list[2], (7+buffer*2,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
ax.set_xlabel('MOI', labelpad=10)

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['H']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.5, bottom=0.3, top=0.35, right=0.12)
axes = subfig.subplots(1,2, gridspec_kw=dict(wspace=0.3, width_ratios=[1,0.3]))

plot_df = copy_num_stats2

# scatter marker vs output, binned by copy number
ax = axes[0]
for model, group in plot_df.groupby('model'):
    sns.scatterplot(data=group, x='marker_mean', y='output_mean', hue='model', palette=model_palette, 
                    marker=model_markers[model], ax=ax, legend=False)
    ax.errorbar(group['marker_mean'], group['output_mean'], xerr=group['marker_std'], yerr=group['output_std'],
                color=base.get_dark_color(model_palette[model]), fmt='none')
ax.set(xlabel='marker (#)', ylabel='output (#)', xlim=[0,15e3], ylim=[0,5.5e3])
ax.set(xticklabels=[f'{float(l.get_text())/1000:.0f}k' for l in ax.get_xticklabels()], yticklabels=[f'{float(l.get_text())/1000:.0f}k' for l in ax.get_yticklabels()])
ax.axvspan(plot_df.loc[(plot_df['copy_num']==12) | (plot_df['copy_num']==13), 'marker_mean'].median(), ax.get_xlim()[1],
           color=base.get_light_color(base.colors['gray']), alpha=0.2,)

# slope
ax = axes[1]
for model, group in fits2.groupby('model', sort=False):
    sns.pointplot(data=group, x='model', y='slope', hue='model', palette=model_palette,
                    ax=ax, marker=model_markers[model], native_scale=True, **scatter_kwargs)
ax.set(title='Slope', xlabel='model', ylabel='', xticklabels=[model_label[l.get_text()] for l in ax.get_xticklabels()],
       yticks=[0.1,0.2,0.3,0.4])

fig.savefig(rd.outfile(output_path))

In [None]:
# Save to OneDrive
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/'fig_architecture-supp.pdf'))