In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

### Load data

Two-gene data (`data`)

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'
metadata_path = rd.datadir/'projects'/'miR-iFFL'/'plasmids'
data, quantiles, stats, metadata = base.load_data(base_path, metadata_path, 'two_gene')

In [None]:
# Extract palette dicts from dataframes
metadata_dict = metadata.set_index('gene').to_dict('dict')
gene_palette = metadata_dict['color']
gene_markers = metadata_dict['markers']

metadata_dict = metadata.set_index('kind').to_dict('dict')
kind_palette = metadata_dict['color']
kind_markers = metadata_dict['markers']

# Create color palette by condition
metadata.loc[(metadata['gene']=='2V') & (metadata['construct2_promoter']=='U6'), 'color'] = base.colors['blue']
metadata.loc[(metadata['ts_kind']=='NT'), 'color'] = base.colors['gray']
metadata.loc[metadata['group'].isin(['base','marker']), 'color'] = 'black'

metadata_dict = metadata.set_index('condition').to_dict('dict')
condition_palette = metadata_dict['color']
condition_markers = metadata_dict['markers']

architecture_order = ['1T', '2T', '2V']

In [None]:
# Data for two-gene architectures with 3'UTR target sites
stats_subset = stats[((stats['gene']=='1T') & (stats['design']==1) & (stats['group']=='controller')) |
                ((stats['gene']=='2T') & (stats['group']=='dual') & (stats['ts_loc']=='3\'')) |
                ((stats['gene']=='2V') & (stats['group']=='ts3') & (stats['construct2_promoter']=='EF1a')) |
                ((stats['group']=='base') & (stats['construct2_promoter']!='U6'))].copy()
stats_subset.sort_values(['gene','construct2_promoter','group','ts_kind'], inplace=True)

# Remove outliers (1T base biorep1, 2V EF1a base/NT biorep4 -- see `flow/outliers.ipynb`)
stats_subset = stats_subset[~((stats_subset['gene']=='1T') & (stats_subset['group']=='base') & (stats_subset['biorep']==1))]
stats_subset = stats_subset[~((stats_subset['gene']=='2V') & (stats_subset['construct2_promoter']=='EF1a') & (stats_subset['ts_kind'].isin(['na','NT'])) & (stats_subset['biorep']==4))]

Load stochastic simulations (`sim_data`)

In [None]:
simulation_path = rd.datadir/'projects'/'miR-iFFL'
sim_data, sim_stats = base.load_modeling(simulation_path, 'stochastic_sims')

### Set up figure

In [None]:
base_size = base.font_sizes['base_size']
smaller_size = base.font_sizes['smaller_size']

sns.set_style('ticks')
sns.set_context('paper', font_scale=1.0, rc={'font.size': base_size, 'font.family': 'sans-serif', 'font.sans-serif':['Arial']})
plt.rcParams.update({'axes.titlesize': base_size, 'axes.labelsize': base_size, 'xtick.labelsize': smaller_size, 'ytick.labelsize': smaller_size,
                     'pdf.fonttype': 42, 
                     'ytick.major.size': 3, 'xtick.major.size': 3, 'ytick.minor.size': 2, 'ytick.major.pad': 2, 'xtick.major.pad': 2, 
                     'lines.linewidth': 1,
                     'axes.spines.right': False, 'axes.spines.top': False, 'axes.labelpad': 2})

In [None]:
# Create the overall figure, gridspec, and add subfigure labels
fig = plt.figure(figsize=(6.8504,1.5+1.75))
fig_gridspec = matplotlib.gridspec.GridSpec(2, 6, figure=fig,
    wspace=0.4, hspace=0.4, height_ratios=[1.5,1.75], width_ratios=[1]*6)
subfigures = {
    'A': fig.add_subfigure(fig_gridspec[0,:2]),
    'B': fig.add_subfigure(fig_gridspec[0,2:]),
    'C': fig.add_subfigure(fig_gridspec[1,:2]),
    'D': fig.add_subfigure(fig_gridspec[1,2:]),
}
for label, subfig in subfigures.items():
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}', fontsize=base.font_sizes['subpanel_label'], fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))

scatter_kwargs = dict(s=4, jitter=0.1, linewidth=0.5, edgecolor='white')

output_path = rd.rootdir/'output'/'fig_architecture-supp'/'fig_architecture-supp.pdf'
fig.savefig(rd.outfile(output_path))

In [None]:
# Define values to shift xticks in plotting 
# to add more space between architecture groups
buffer = 0.6
gene_order = ['1T', '2T', '2V']
xtick_locs = [0,1,2, 3+buffer,4+buffer, 5+buffer*2,6+buffer*2,7+buffer*2,]

condition_loc = {k:v for k,v in zip(stats_subset['condition'].unique(), xtick_locs)}
stats_subset['condition_loc'] = stats_subset['condition'].replace(condition_loc)

metadata['condition_loc'] = metadata['condition'].map(condition_loc)
m = metadata.dropna()
m['condition_loc'] = m['condition_loc'].astype(str)

xlim_adjusted = (-0.5, len(stats_subset['condition'].unique())-0.5+buffer*(len(gene_order)-1))
scatter_kwargs2 = dict(s=4, jitter=0.1, linewidth=0.5, edgecolor='white', native_scale=True)

ts_label = {'na': 'base', 'NT': 'OL', 'T': 'CL'}

In [None]:
subfig = subfigures['A']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.4, top=0.35, right=0.1)
axes = subfig.subplots(1,2, gridspec_kw=dict(wspace=0.4))

plot_df = stats_subset

# stat std
ax = axes[0]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.stripplot(data=group, x='condition_loc', y='output_std', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs2)
ax.set(title='Std.', xlim=xlim_adjusted, xlabel='', ylabel='', yscale='log', xticks=xtick_locs,)

# stat CV
ax = axes[1]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.stripplot(data=group, x='condition_loc', y='output_variation', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs2)
ax.set(title='CV', xlim=xlim_adjusted, xlabel='', ylabel='', xticks=xtick_locs, ylim=(0.8,2))

for ax in axes:
       yloc = -6
       ax.axvspan(2.5+buffer/2, 4.5+buffer*1.5, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
       ax.annotate(architecture_order[0], (1,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
       ax.annotate(architecture_order[1], (3.5+buffer,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
       ax.annotate(architecture_order[2], (6+buffer*2,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
       ax.set_xticklabels(['']*len(ax.get_xticklabels()))
       ax.set_xlabel(ax.get_xlabel(), labelpad=10)
       sns.despine(ax=ax)
       ax.minorticks_off()

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['B']
rd.plot.adjust_subplot_margins_inches(subfig, left=1.5, bottom=0.4, top=0.35, right=0.1)
axes = subfig.subplots(1,4, gridspec_kw=dict(wspace=0.4))

plot_df = stats[(stats['construct2_promoter']=='U6') & (stats['ts_loc']!='5\'') & (stats['ts_kind']!='none')].copy()
plot_df['ts_kind'] = plot_df['ts_kind'].astype(pd.api.types.CategoricalDtype(categories=list(ts_label.keys()), ordered=True))

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.stripplot(data=group, x='ts_kind', y='output_gmean', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log',)
marker_baseline = stats.loc[(stats['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

# slope
ax = axes[1]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.stripplot(data=group, x='ts_kind', y='slope', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlabel='', ylabel='', ylim=(ax.get_ylim()[0],1), 
       yticklabels=[f'{float(re.sub(u"\u2212", "-", l.get_text())):.1f}' for l in ax.get_yticklabels()])

# stat std
ax = axes[2]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.stripplot(data=group, x='ts_kind', y='output_std', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs)
ax.set(title='Std.', xlabel='', ylabel='', yscale='log',)

# stat CV
ax = axes[3]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.stripplot(data=group, x='ts_kind', y='output_variation', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs)
ax.set(title='CV', xlabel='', ylabel='',)

for i,ax in enumerate(axes):
    ax.set_xticklabels([ts_label[l.get_text()] for l in ax.get_xticklabels()], ha='right', rotation=45)
    sns.despine(ax=ax)

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['C']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.2, bottom=0.3, top=0.6, right=0.1)
axes = subfig.subplots(1,3, gridspec_kw=dict(wspace=0.2,))

plot_df = sim_data[(sim_data['risc']==10000) & (sim_data['design']<=1)]
xticks = [0,1,5,10,15,20]
xticklabels = [str(x) for x in xticks]
xticklabels[0] = ''

for i,moi in enumerate(plot_df['moi'].unique()):

    # Poisson distribution
    ax = axes[i]
    x_range = range(1,20)
    ax.plot(x_range, base.truncated_poisson(x_range, moi), '-o', ms=2, color=base.colors['gray'], ls='-')
    ax.set(yticks=[], yticklabels=[], xticks=xticks, xticklabels=xticklabels, title=str(moi))
    if i==1: ax.set(xlabel='copy #, $c$')
    elif i==0: ax.set(title='MOI = '+ax.get_title())
    sns.despine(ax=ax, left=True)

for ax in axes.flatten(): ax.minorticks_off()
    
fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['D']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.3, top=0.55, right=0.1)
axes = subfig.subplots(1,4, gridspec_kw=dict(wspace=0.4))

plot_df = sim_stats[(sim_stats['design']<=1) & (sim_stats['risc']==10000)]
plot_df.sort_values(['moi','gene'], inplace=True)

buffer = 0.6
moi_list = plot_df['moi'].unique()
xtick_locs = np.concatenate([[x+i*(3+buffer) for x in range(3)] for i in range(len(moi_list))])
xlim_adjusted = (-0.5, len(plot_df['condition'].unique())-0.5+buffer*(len(gene_order)-1))

condition_loc = {k:v for k,v in zip(plot_df['condition'].unique(), xtick_locs)}
plot_df['condition_loc'] = plot_df['condition'].replace(condition_loc)

# stat gmean
ax = axes[0]
for (gene, moi), group in plot_df.groupby(['gene','moi'], sort=False):
    sns.stripplot(data=group, x='condition_loc', y='marker_gmean', color=base.colors['gray'],
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
    sns.stripplot(data=group, x='condition_loc', y='output_gmean', hue='gene', palette=gene_palette,
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
ax.set(title='Mean', xlim=xlim_adjusted, xlabel='MOI', ylabel='', yscale='log', xticks=xtick_locs)

# slope
ax = axes[1]
for (gene, moi), group in plot_df.groupby(['gene','moi'], sort=False):
    sns.stripplot(data=group, x='condition_loc', y='slope', hue='gene', palette=gene_palette,
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
ax.set(title='Slope', xlim=xlim_adjusted, xlabel='', ylabel='', xticks=xtick_locs, yticks=[0.6,0.7,0.8,0.9,1])

# stat std
ax = axes[2]
for (gene, moi), group in plot_df.groupby(['gene','moi'], sort=False):
    sns.stripplot(data=group, x='condition_loc', y='marker_std', color=base.colors['gray'],
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
    sns.stripplot(data=group, x='condition_loc', y='output_std', hue='gene', palette=gene_palette,
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
ax.set(title='Std.', xlim=xlim_adjusted, xlabel='', ylabel='', yscale='log', xticks=xtick_locs,)
ax.yaxis.set_minor_formatter(plt.NullFormatter())

# stat CV
ax = axes[3]
for (gene, moi), group in plot_df.groupby(['gene','moi'], sort=False):
    sns.stripplot(data=group, x='condition_loc', y='marker_variation', color=base.colors['gray'],
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
    sns.stripplot(data=group, x='condition_loc', y='output_variation', hue='gene', palette=gene_palette,
                    legend=False, ax=ax, marker=gene_markers[gene], **scatter_kwargs2)
ax.set(title='CV', xlim=xlim_adjusted, xlabel='', ylabel='', xticks=xtick_locs,)
ax.yaxis.set_minor_formatter(plt.NullFormatter())


for i,ax in enumerate(axes):
    yloc = -6
    ax.axvspan(2.5+buffer/2, 5.5+buffer*1.5, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    ax.annotate(moi_list[0], (1,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
    ax.annotate(moi_list[1], (4+buffer,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
    ax.annotate(moi_list[2], (7+buffer*2,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=smaller_size)
    ax.set_xticklabels(['']*len(ax.get_xticklabels()))
    ax.set_xlabel(ax.get_xlabel(), labelpad=10)
    sns.despine(ax=ax)

fig.savefig(rd.outfile(output_path))

In [None]:
# Save to OneDrive
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/'fig_architecture-supp.pdf'))