In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns
from statannotations.Annotator import Annotator

# enables concurrent editing of base.py
from importlib import reload
reload(base)

### Load data

Two-gene data (`data`)

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'
metadata_path = rd.datadir/'projects'/'miR-iFFL'/'plasmids'
data, quantiles, stats, metadata = base.load_data(base_path, metadata_path, 'two_gene')

In [None]:
# Extract palette dicts from dataframes
metadata_dict = metadata.set_index('gene').to_dict('dict')
gene_palette = metadata_dict['color']
gene_markers = metadata_dict['markers']

metadata_dict = metadata.set_index('kind').to_dict('dict')
kind_palette = metadata_dict['color']
kind_markers = metadata_dict['markers']

# Create color palette by condition
metadata.loc[(metadata['gene']=='2V') & (metadata['construct2_promoter']=='U6'), 'color'] = base.colors['blue']
metadata.loc[(metadata['ts_kind']=='NT'), 'color'] = base.colors['gray']
metadata.loc[metadata['group'].isin(['base','marker']), 'color'] = 'black'

metadata_dict = metadata.set_index('condition').to_dict('dict')
condition_palette = metadata_dict['color']
condition_markers = metadata_dict['markers']

architecture_order = ['1T', '2T', '2V']

Load stochastic simulations (`sim_data`)

In [None]:
simulation_path = rd.datadir/'projects'/'miR-iFFL'
sim_data, _ = base.load_modeling(simulation_path, 'stochastic_sims')

In [None]:
# Data for two-gene architectures with 3'UTR target sites
stats_subset = stats[((stats['gene']=='1T') & (stats['design']==1) & (stats['group']=='controller')) |
                ((stats['gene']=='2T') & (stats['group']=='dual') & (stats['ts_loc']=='3\'')) |
                ((stats['gene']=='2V') & (stats['group']=='ts3') & (stats['construct2_promoter']=='EF1a')) |
                ((stats['group']=='base') & (stats['construct2_promoter']!='U6'))].copy()
stats_subset.sort_values(['gene','construct2_promoter','group','ts_kind'], inplace=True)

# Remove outliers (1T base biorep1, 2V EF1a base/NT biorep4 -- see `flow/outliers.ipynb`)
stats_subset = stats_subset[~((stats_subset['gene']=='1T') & (stats_subset['group']=='base') & (stats_subset['biorep']==1))]
stats_subset = stats_subset[~((stats_subset['gene']=='2V') & (stats_subset['construct2_promoter']=='EF1a') & (stats_subset['ts_kind'].isin(['na','NT'])) & (stats_subset['biorep']==4))]
stats_subset = stats_subset[~((stats_subset['design']==2) & (stats_subset['ts_kind']=='T') & (stats_subset['biorep']==1))]

### Set up figure

In [None]:
# Set plotting context
sns.set_style('ticks')
sns.set_context('paper', font_scale=1.0, rc=base.rc_context)
plt.rcParams.update(base.rc_params)
scatter_kwargs = base.scatter_kwargs
scatter_kwargs['jitter'] = 0.1
annotate_kwargs = base.annotate_kwargs

# Create the overall figure, gridspec, and add subfigure labels
fig = plt.figure(figsize=(base.figure_width['full'],5.375))
fig_gridspec = matplotlib.gridspec.GridSpec(2, 4, figure=fig,
    wspace=0.4, hspace=0.4, height_ratios=[2,3.25], width_ratios=[0.27,0.23,0.1,0.4])
subfigures = {
    'A': fig.add_subfigure(fig_gridspec[0,0]),
    'B': fig.add_subfigure(fig_gridspec[0,3:]),
    'C': fig.add_subfigure(fig_gridspec[1,:2]),
    'D': fig.add_subfigure(fig_gridspec[1,2:]),
}

# Add subpanel labels
for label, subfig in subfigures.items():
    if '2' in label: continue
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}', fontsize=base.font_sizes['subpanel_label'], 
                                           fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))

# Save to output folder
output_path = rd.rootdir/'output'/'fig_architecture'/'fig_architecture.pdf'
fig.savefig(rd.outfile(output_path))

In [None]:
# Define values to shift xticks in plotting 
# to add more space between architecture groups
buffer = 0.6
gene_order = ['1T', '2T', '2V']
xtick_locs = [0,1,2, 3+buffer,4+buffer, 5+buffer*2,6+buffer*2,7+buffer*2,]

condition_loc = {k:v for k,v in zip(stats_subset['condition'].unique(), xtick_locs)}
stats_subset['condition_loc'] = stats_subset['condition'].replace(condition_loc)

metadata['condition_loc'] = metadata['condition'].map(condition_loc)
m = metadata.dropna()
m['condition_loc'] = m['condition_loc'].astype(str)

xlim_adjusted = (-0.5, len(stats_subset['condition'].unique())-0.5+buffer*(len(gene_order)-1))
scatter_kwargs2 = dict(s=4, jitter=0.1, linewidth=0.5, edgecolor='white', native_scale=True)

In [None]:
subfig = subfigures['B']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.35, bottom=0.45, top=0.35, right=0.1)
axes = subfig.subplots(1,2, gridspec_kw=dict(width_ratios=(1,1), wspace=0.3))

plot_df = stats_subset

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.stripplot(data=group, x='condition_loc', y='output_gmean', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs2)
ax.set(title='Output mean', xlim=xlim_adjusted, xlabel='', ylabel='', yscale='log', xticks=xtick_locs)
marker_baseline = stats.loc[(stats['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

# slope
ax = axes[1]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.stripplot(data=group, x='condition_loc', y='slope', hue='condition', palette=condition_palette,
                    legend=False, ax=ax, marker=condition_markers[construct], **scatter_kwargs2)
ax.set(title='Slope', xlim=xlim_adjusted, xlabel='', ylabel='', xticks=xtick_locs,)

for i,ax in enumerate(axes):
    yloc = -6
    ax.axvspan(2.5+buffer/2, 4.5+buffer*1.5, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    ax.annotate(architecture_order[0], (1,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
    ax.annotate(architecture_order[1], (3.5+buffer,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
    ax.annotate(architecture_order[2], (6+buffer*2,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
    ax.set_xticklabels(['']*len(ax.get_xticklabels()))
    ax.set_xlabel(ax.get_xlabel(), labelpad=10)
    sns.despine(ax=ax)

fig.savefig(rd.outfile(output_path))

# perform statistical tests
pairs = [('base','CL'), ('OL','CL')]
f, axes = plt.subplots(1,len(gene_order), figsize=(5,2), gridspec_kw=dict(wspace=0.5))
print('-------- OUTPUT MEAN --------\n')
for ax, gene in zip(axes, gene_order):
    print('Circuit type: '+gene)
    test_df = plot_df[(plot_df['gene']==gene)]
    sns.stripplot(data=test_df, x='ts_label', y='output_gmean', ax=ax, hue='condition', palette=condition_palette, legend=False)
    ax.set(ylabel='', xlabel='', title=gene, yscale='log')

    if gene=='2T': ps = pairs[1:]
    else: ps = pairs
    annotator = Annotator(ax, ps, data=test_df, x='ts_label', y='output_gmean')
    annotator.configure(**annotate_kwargs,).apply_and_annotate()
    print('\n')
axes[0].set(ylabel='Output mean')

f, axes = plt.subplots(1,len(gene_order), figsize=(5,2), gridspec_kw=dict(wspace=0.5))
print('-------- SLOPE --------\n')
for ax, gene in zip(axes, gene_order):
    print('Circuit type: '+gene)
    test_df = plot_df[(plot_df['gene']==gene)]
    sns.stripplot(data=test_df, x='ts_label', y='slope', ax=ax, hue='condition', palette=condition_palette, legend=False)
    ax.set(ylabel='', xlabel='', title=gene)

    if gene=='2T': ps = pairs[1:]
    else: ps = pairs
    annotator = Annotator(ax, ps, data=test_df, x='ts_label', y='slope')
    annotator.configure(**annotate_kwargs,).apply_and_annotate()
    print('\n')
axes[0].set(ylabel='Slope')

In [None]:
subfig = subfigures['D']
rd.plot.adjust_subplot_margins_inches(subfig, left=1, bottom=0.3, top=0.3, right=0.1)
axes = subfig.subplots(3,2, gridspec_kw=dict(width_ratios=(0.8,1), wspace=0.2, hspace=0.4))

plot_df = sim_data[(sim_data['risc']==10000) & (sim_data['design']<=1)]
yticks = [[0,1e3,2e3], [0,2e3,4e3,6e3], [0,2e3,4e3,6e3]]
hist_xticks = [[0,300,600,900,1200,1500], [0,1e3,2e3,3e3], [0,1e3,2e3,3e3,4e3,5e3]]

for i,moi in enumerate(plot_df['moi'].unique()):

    # line plot
    ax = axes[i,0]

    # drop data with only one point at a given copy number (for this plot)
    d = plot_df[(plot_df['moi']==moi)]
    if moi==3: d = d[(d['copy_num']<=10) & (d['gene']=='2V') | (d['gene']!='2V')]
    
    sns.lineplot(data=d[d['gene']!='1T'], x='copy_num', y='output', hue='kind', palette=kind_palette, 
                 legend=False, dashes=False, style='kind', markers=kind_markers, ax=ax, markersize=4, markeredgewidth=0.5,
                 estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
    sns.lineplot(data=d[d['gene']=='1T'], x='copy_num', y='output', hue='kind', palette=kind_palette, 
                 legend=False, dashes=False, style='kind', markers=kind_markers, ax=ax, markersize=4, markeredgewidth=0.5,
                 estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)), 
                 ls='--')
    sns.despine(ax=ax)
    ax.minorticks_off()
    ax.set(xlabel='', yticks=yticks[i], yticklabels=[f'{float(l)/1000:.0f}k' for l in yticks[i]], ylabel='output (#)')
    
    if i==2: ax.set(xlabel='copy number')

    # histogram
    ax = axes[i,1]
    sns.kdeplot(data=d[d['gene']!='1T'], x='output', hue='kind', palette=kind_palette, ax=ax, legend=False, common_norm=False,
                bw_adjust=3)
    sns.kdeplot(data=d[d['gene']=='1T'], x='output', hue='kind', palette=kind_palette, ax=ax, legend=False, common_norm=False,
                bw_adjust=3, ls='--')
    sns.despine(ax=ax, left=True)
    
    ax.set(xlabel='', xlim=(hist_xticks[i][0], hist_xticks[i][-1]), xticks=hist_xticks[i], ylabel='', yticklabels=[],)
    if i>0: ax.set(xticklabels=[f'{float(l.get_text())/1000:.0f}' for l in ax.get_xticklabels()])
    else: ax.set(xticklabels=[f'{float(l.get_text())/1000:.1f}' for l in ax.get_xticklabels()])
    xticklabels_k = ax.get_xticklabels()
    xticklabels_k[-1] = xticklabels_k[-1].get_text()+'k'
    ax.set(xticklabels=xticklabels_k)
    ax.get_yaxis().set_visible(False)
    if i==2: ax.set(xlabel='protein (#)')

for ax in axes.flatten(): ax.minorticks_off()
    
fig.savefig(rd.outfile(output_path))

In [None]:
# Save to OneDrive
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/'fig_architecture.pdf'))