In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns
from statannotations.Annotator import Annotator

# enables concurrent editing of base.py
from importlib import reload
reload(base)

### Load data

Circuit tuning data (`data`)

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'
metadata_path = rd.datadir/'projects'/'miR-iFFL'/'plasmids'
data, quantiles, stats, metadata = base.load_data(base_path, metadata_path, 'tuning', 'designs')

In [None]:
# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
designs_palette = metadata_dict['color']
designs_markers = metadata_dict['markers']

Two-gene data (`data2`)

In [None]:
data2, quantiles2, stats2, metadata2 = base.load_data(base_path, metadata_path, 'two_gene')

In [None]:
# Create palette for two-gene data
metadata2.loc[(metadata2['gene']=='2V') & (metadata2['construct2_promoter']=='U6'), 'color'] = base.colors['blue']
metadata2.loc[(metadata2['ts_kind']=='NT'), 'color'] = base.colors['gray']
metadata2.loc[(metadata2['ts_kind']=='NT') & (metadata2['design']==3), 'color'] = metadata2.loc[(metadata2['ts_kind']=='NT') & (metadata2['design']==3), 'color'].apply(base.get_dark_color)
metadata2.loc[metadata2['group'].isin(['base','marker']), 'color'] = 'black'

metadata_dict2 = metadata2.set_index('condition').to_dict('dict')
condition_palette = metadata_dict2['color']
condition_markers = metadata_dict2['markers']

gene_order = ['1T', '2T', '2V']

In [None]:
# Create a second palette for regular tuning
metadata3 = base.get_metadata(metadata_path/'construct-metadata.xlsx', 'tuning')
metadata_dict3 = metadata3.set_index('construct').to_dict('dict')
main_palette = metadata_dict3['color']
main_markers = metadata_dict3['markers']

In [None]:
# Data for two-gene architectures with 5'UTR target sites
stats_subset = stats2[((stats2['gene']=='1T') & (stats2['design']>1) & (stats2['group']=='controller')) |
                ((stats2['gene']=='2T') & (stats2['group']=='dual') & (stats2['ts_loc']=='5\'')) |
                ((stats2['gene']=='2V') & (stats2['group']=='ts5')) |
                (stats2['group']=='base')].copy()
stats_subset.sort_values(['gene','construct2_promoter','group','ts_kind'], inplace=True)

# Remove outliers (1T base biorep1, 2V EF1a base/NT biorep4 -- see `flow/outliers.ipynb`)
stats_subset = stats_subset[~((stats_subset['gene']=='1T') & (stats_subset['group']=='base') & (stats_subset['biorep']==1))]
stats_subset = stats_subset[~((stats_subset['gene']=='2V') & (stats_subset['construct2_promoter']=='EF1a') & (stats_subset['ts_kind'].isin(['na','NT'])) & (stats_subset['biorep']==4))]
stats_subset = stats_subset[~((stats_subset['design']==2) & (stats_subset['ts_kind']=='T') & (stats_subset['biorep']==1))]

### Set up figure

In [None]:
# Set plotting context
sns.set_style('ticks')
sns.set_context('paper', font_scale=1.0, rc=base.rc_context)
plt.rcParams.update(base.rc_params)
scatter_kwargs = base.scatter_kwargs
annotate_kwargs = base.annotate_kwargs

# Create the overall figure, gridspec, and add subfigure labels
fig = plt.figure(figsize=(base.figure_width['full'], 8))
fig_gridspec = matplotlib.gridspec.GridSpec(5, 6, figure=fig,
    wspace=0.4, hspace=0.4, height_ratios=(1.75,1.75,1.65,0.85,2),)

subfigures = {
    'A': fig.add_subfigure(fig_gridspec[0,:3]),
    'B': fig.add_subfigure(fig_gridspec[0,3:]),
    'C': fig.add_subfigure(fig_gridspec[1,:3]),
    'D': fig.add_subfigure(fig_gridspec[1,3:]),
    'E': fig.add_subfigure(fig_gridspec[2,:3]),
    'F': fig.add_subfigure(fig_gridspec[2,3:]),
    'G': fig.add_subfigure(fig_gridspec[3,:]),
    'H': fig.add_subfigure(fig_gridspec[4,:]),
    'I': fig.add_subfigure(fig_gridspec[4,:]),
}

# Add subpanel labels
for label, subfig in subfigures.items():
    if '2' in label: continue
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}', fontsize=base.font_sizes['subpanel_label'], 
                                           fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))

# Save to output folder
output_path = rd.rootdir/'output'/'fig_designs-supp'/'fig_designs-supp.pdf'
fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['A']
rd.plot.adjust_subplot_margins_inches(subfig, left=1.95, bottom=0.5, top=0.4, right=0.15)
axes = subfig.subplots(1,2, gridspec_kw=dict(wspace=0.4))

plot_df = stats[(((stats['miR']=='miRE.FF4') & (stats['group']=='controller') & (stats['ts_num']==1)) | (stats['group']=='base')) &
                       (stats['promoter']=='EF1a')]
display(plot_df['construct'].unique())

# stat std
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.pointplot(data=group, x='design', y='output_std', hue='construct', palette=designs_palette,
                  ax=ax, marker=designs_markers[construct], **scatter_kwargs)
ax.set(title='Std.', xlim=(-0.5, len(plot_df['design'].unique())-0.5), xlabel='', ylabel='', 
       yscale='log', ylim=(1e2,5e4))
ax.set(xticklabels=[l.get_text() if l.get_text()!='0' else 'base' for l in ax.get_xticklabels()])

# stat CV
ax = axes[1]
for construct, group in plot_df.groupby('construct'):
    sns.pointplot(data=group, x='design', y='output_variation', hue='construct', palette=designs_palette,
                  ax=ax, marker=designs_markers[construct], **scatter_kwargs)
ax.set(title='CV', xlim=(-0.5, len(plot_df['design'].unique())-0.5), xlabel='', ylabel='', 
       ylim=(0,5), yticks=(0,1,2,3,4))
ax.set(xticklabels=[l.get_text() if l.get_text()!='0' else 'base' for l in ax.get_xticklabels()])

fig.savefig(rd.outfile(output_path))

# perform statistical tests
pairs = [('base','CL'), ('OL','CL')]
designs = plot_df['design'].unique()

for stat in ['output_std', 'output_variation']:
    f, axes = plt.subplots(1,len(designs)-1, figsize=(5,2), gridspec_kw=dict(wspace=0.5))
    for ax, design in zip(axes, designs[1:]):
        print('Design: '+str(design))
        test_df = plot_df[(plot_df['design']==design) | (plot_df['group']=='base')]
        sns.stripplot(data=test_df, x='ts_label', y=stat, ax=ax, hue='construct', palette=designs_palette, legend=False)
        ax.set(ylabel='', xlabel='', title=str(design), yscale='log')

        annotator = Annotator(ax, pairs, data=test_df, x='ts_label', y=stat)
        annotator.configure(**annotate_kwargs,).apply_and_annotate()
        print('\n')
    axes[0].set(ylabel=stat)

In [None]:
subfig = subfigures['B']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.5, top=0.65, right=0.15)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.35))

# EF1a miR controls
miR_order = ['none', 'miR.FF5', 'miR.FF4', 'miRE.FF5', 'miRE.FF4',]
plot_df = stats[((((stats['group']=='miR')) & (stats['miR_loc']=='UTR')) | (stats['group']=='base')) &
                (stats['promoter']=='EF1a')].copy()
plot_df['miR'] = plot_df['miR'].astype(pd.api.types.CategoricalDtype(categories=miR_order, ordered=True))
xlim = (-0.5, len(plot_df['construct'].unique())-0.5)
display(plot_df['construct'].unique())

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.pointplot(data=group, x='miR', y='output_gmean', hue='construct', palette=main_palette,
                  ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlim=xlim, xlabel='', ylabel='', yscale='log', ylim=(2e1,4e4))
marker_baseline = stats.loc[(stats['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

# stat std
ax = axes[1]
for construct, group in plot_df.groupby('construct'):
    sns.pointplot(data=group, x='miR', y='output_std', hue='construct', palette=main_palette,
                  ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Std.', xlim=xlim, xlabel='', ylabel='', yscale='log', ylim=(1e3,3e4))

# slope
ax = axes[2]
for construct, group in plot_df.groupby('construct'):
    sns.pointplot(data=group, x='miR', y='slope', hue='construct', palette=main_palette,
                  ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlim=xlim, xlabel='', ylim=(0,1.3), ylabel='',
       yticks=[0,0.25,0.5,0.75,1], yticklabels=['0.0','','0.5','','1.0'])
ax.axhline(marker_baseline, color='black', ls=':')

for ax in axes:
    ax.set_xticklabels([l.get_text().replace('.','-') for l in ax.get_xticklabels()], rotation=45, ha='right')
    ax.minorticks_off()

fig.savefig(rd.outfile(output_path))

# perform statistical tests
f, axes = plt.subplots(1,3, figsize=(5,2))
pairs = [('none',miR) for miR in miR_order[1:]]
stat_list = ['output_gmean', 'output_std', 'slope']
for ax, stat in zip(axes, stat_list):
    sns.stripplot(data=plot_df, x='miR', y=stat, ax=ax, hue='construct', palette=main_palette, legend=False,)
    ax.set_xticklabels([l.get_text().replace('.','-') for l in ax.get_xticklabels()], rotation=45, ha='right')
    annotator = Annotator(ax, pairs, data=plot_df, x='miR', y=stat,)
    annotator.configure(**annotate_kwargs, line_offset=2).apply_and_annotate()
axes[0].set(yscale='log')
axes[1].set(yscale='log')

In [None]:
subfig = subfigures['C']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.5, top=0.65, right=0.15)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.35))

# EF1a miR controls
ts_order = ['none','FF3x1','FF4x1','FF5x1','FF6x1']
plot_df = stats[((((stats['group']=='ts5')) & (stats['ts_num']==1)) | (stats['group']=='base')) &
                (stats['promoter']=='EF1a')].copy()
plot_df['ts'] = plot_df['ts'].astype(pd.api.types.CategoricalDtype(categories=ts_order, ordered=True))
xlim = (-0.5, len(plot_df['ts'].unique())-0.5)
display(plot_df['construct'].unique())

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.pointplot(data=group, x='ts', y='output_gmean', hue='construct', palette=main_palette,
                  ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlim=xlim, xlabel='', ylabel='', yscale='log', ylim=(2e1,4e4))
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
marker_baseline = stats.loc[(stats['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

# stat std
ax = axes[1]
for construct, group in plot_df.groupby('construct'):
    sns.pointplot(data=group, x='ts', y='output_std', hue='construct', palette=main_palette,
                  ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Std.', xlim=xlim, xlabel='', ylabel='', yscale='log', ylim=(1e3,3e4))

# slope
ax = axes[2]
for construct, group in plot_df.groupby('construct'):
    sns.pointplot(data=group, x='ts', y='slope', hue='construct', palette=main_palette,
                  ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlim=xlim, xlabel='', ylim=(0,1.3), ylabel='',
       yticks=[0,0.25,0.5,0.75,1], yticklabels=['0.0','','0.5','','1.0'])
ax.axhline(marker_baseline, color='black', ls=':')

for ax in axes:
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
    ax.minorticks_off()

fig.savefig(rd.outfile(output_path))

# perform statistical tests
f, axes = plt.subplots(1,3, figsize=(5,2))
pairs = [('none',ts) for ts in ts_order[1:]]
stat_list = ['output_gmean', 'output_std', 'slope']
for ax, stat in zip(axes, stat_list):
    sns.stripplot(data=plot_df, x='ts', y=stat, ax=ax, hue='construct', palette=main_palette, legend=False,)
    ax.set_xticklabels([l.get_text().replace('.','-') for l in ax.get_xticklabels()], rotation=45, ha='right')
    annotator = Annotator(ax, pairs, data=plot_df, x='ts', y=stat,)
    annotator.configure(**annotate_kwargs, line_offset=2).apply_and_annotate()
axes[0].set(yscale='log')
axes[1].set(yscale='log')

In [None]:
def designs_stats_plots(axes, plot_df):

    xlim = (-0.5, len(plot_df['design'].unique())-0.5)

    # stat gmean
    ax = axes[0]
    for construct, group in plot_df.groupby('construct'):
        sns.pointplot(data=group, x='design', y='output_gmean', hue='construct', palette=designs_palette,
                      ax=ax, marker=designs_markers[construct], **scatter_kwargs)
    ax.set(title='Mean', xlim=xlim, xlabel='design', ylabel='', yscale='log', ylim=(2e1,3e4))
    marker_baseline = stats.loc[(stats['group']=='marker'), 'output_gmean'].mean()
    ax.axhline(marker_baseline, color='black', ls=':')
    ax.minorticks_off()

    # stat std
    ax = axes[1]
    for construct, group in plot_df.groupby('construct'):
        sns.pointplot(data=group, x='design', y='output_std', hue='construct', palette=designs_palette,
                      ax=ax, marker=designs_markers[construct], **scatter_kwargs)
    ax.set(title='Std.', xlim=xlim, xlabel='', ylabel='', yscale='log', ylim=(2e2,4e4))

    # slope
    ax = axes[2]
    for construct, group in plot_df.groupby('construct'):
        sns.pointplot(data=group, x='design', y='slope', hue='construct', palette=designs_palette,
                      ax=ax, marker=designs_markers[construct], **scatter_kwargs)
    ax.set(title='Slope', xlim=xlim, xlabel='', ylabel='', ylim=(0,1.4),
        yticks=[0,0.25,0.5,0.75,1,1.25], yticklabels=['0.0','','0.5','','1.0',''])
    
    # CV
    ax = axes[3]
    for construct, group in plot_df.groupby('construct'):
        sns.pointplot(data=group, x='design', y='output_variation', hue='construct', palette=designs_palette,
                      ax=ax, marker=designs_markers[construct], **scatter_kwargs)
    ax.set(title='CV', xlim=xlim, xlabel='', ylabel='', ylim=(0,3.3), yticks=(0,1,2))

    # perform statistical tests
    pairs = [('OL','CL')]
    designs = plot_df['design'].unique()
    stat_list = ['output_gmean', 'output_std', 'slope', 'output_variation']
    for stat in stat_list:
        f, axes = plt.subplots(1,len(designs), figsize=(5,2), gridspec_kw=dict(wspace=0.5))

        for ax, design in zip(axes, designs):
            print('Design: '+str(design))
            test_df = plot_df[(plot_df['design']==design)]
            sns.stripplot(data=test_df, x='ts_label', y=stat, ax=ax, hue='construct', palette=designs_palette, legend=False)
            ax.set(ylabel='', xlabel='', title=str(design),)
            if stat != 'output_variation' and stat != 'slope': ax.set( yscale='log')
            annotator = Annotator(ax, pairs, data=test_df, x='ts_label', y=stat)
            annotator.configure(**annotate_kwargs,).apply_and_annotate()
            print('\n')
        axes[0].set(ylabel=stat)

In [None]:
subfig = subfigures['D']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.5, top=0.6, right=0.15)
axes = subfig.subplots(1,4, gridspec_kw=dict(wspace=0.6))

plot_df = stats[(stats['miR']=='miR.FF4') & (stats['group']=='controller') & (stats['ts_num']==1) &
                (stats['promoter']=='EF1a')]
display(plot_df['construct'].unique())

designs_stats_plots(axes, plot_df)

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['E']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.4, top=0.6, right=0.15)
axes = subfig.subplots(1,4, gridspec_kw=dict(wspace=0.6))

plot_df = stats[(stats['miR']=='miR.FF5') & (stats['group']=='controller') & (stats['ts_num']==1) & 
                (stats['ts'].isin(['FF4x1','FF5x1','FF6x1'])) & (stats['promoter']=='EF1a')]
display(plot_df['construct'].unique())

designs_stats_plots(axes, plot_df)

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['F']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.4, top=0.6, right=0.15)
axes = subfig.subplots(1,4, gridspec_kw=dict(wspace=0.6))

plot_df = stats[(stats['miR']=='miRE.FF5') & (stats['group']=='controller') & (stats['ts_num']==1) &
                (stats['promoter']=='EF1a')]
display(plot_df['construct'].unique())

designs_stats_plots(axes, plot_df)

fig.savefig(rd.outfile(output_path))

In [None]:
# Define values to shift xticks in plotting 
# to add more space between architecture groups
buffer = 0.6
gene_order = ['1T', '2T', '2V']
xtick_locs = [0,1,2,3,4, 5+buffer,6+buffer, 7+buffer*2,8+buffer*2,9+buffer*2, 10+buffer*3,11+buffer*3,12+buffer*3,]

condition_loc = {k:v for k,v in zip(stats_subset['condition'].unique(), xtick_locs)}
stats_subset['condition_loc'] = stats_subset['condition'].replace(condition_loc)

metadata2['condition_loc'] = metadata2['condition'].map(condition_loc)
m = metadata2.dropna()
m['condition_loc'] = m['condition_loc'].astype(str)

xlim_adjusted = (-0.5, len(stats_subset['condition'].unique())-0.5+buffer*(len(gene_order)))

In [None]:
subfig = subfigures['H']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.45, top=0.3, right=0.15)
axes = subfig.subplots(1,4, gridspec_kw=dict(wspace=0.25))

plot_df = stats_subset
display(plot_df['condition'].unique())

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.pointplot(data=group, x='condition_loc', y='output_gmean', hue='condition', palette=condition_palette,
                  ax=ax, marker=condition_markers[construct], **scatter_kwargs, native_scale=True)
ax.set(title='Mean', xlim=xlim_adjusted, xlabel='', ylabel='', yscale='log', xticks=xtick_locs, ylim=(3e1,4e4))
marker_baseline = stats2.loc[(stats2['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

# stat std
ax = axes[1]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.pointplot(data=group, x='condition_loc', y='output_std', hue='condition', palette=condition_palette,
                  ax=ax, marker=condition_markers[construct], **scatter_kwargs, native_scale=True)
ax.set(title='Standard deviation', xlim=xlim_adjusted, xlabel='', ylabel='', yscale='log', xticks=xtick_locs, ylim=(6e1,6e4))

# slope
ax = axes[2]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.pointplot(data=group, x='condition_loc', y='slope', hue='condition', palette=condition_palette,
                  ax=ax, marker=condition_markers[construct], **scatter_kwargs, native_scale=True)
ax.set(title='Slope', xlim=xlim_adjusted, xlabel='', ylabel='', xticks=xtick_locs, yticks=[0.2,0.4,0.6,0.8,1.0,1.2], ylim=(0.1,1.7))

# CV
ax = axes[3]
for construct, group in plot_df.groupby('condition', sort=False):
    sns.pointplot(data=group, x='condition_loc', y='output_variation', hue='condition', palette=condition_palette,
                  ax=ax, marker=condition_markers[construct], **scatter_kwargs, native_scale=True)
ax.set(title='CV', xlim=xlim_adjusted, xlabel='', ylabel='', xticks=xtick_locs, ylim=(0,4))

yloc = -6
for i,ax in enumerate(axes):
    ax.axvspan(4.5+buffer/2, 6.5+buffer*1.5, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    ax.axvspan(9.5+buffer*2.5, 12.5+buffer*3.5, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    ax.annotate(gene_order[0], (2,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
    ax.annotate(gene_order[1], (5.5+buffer,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
    ax.annotate('2V\nEF1α', (8+buffer*2,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
    ax.annotate('2V\nU6', (11+buffer*3,yloc), xycoords=('data','axes points'), ha='center', va='top', ma='center', fontsize=base.font_sizes['smaller_size'])
    ax.set_xticklabels(['']*len(ax.get_xticklabels()))

fig.savefig(rd.outfile(output_path))

# perform statistical tests
pairs = [('base','CL'), ('OL','CL')]
promoters = ['EF1a','U6']

for stat in ['output_gmean', 'output_std', 'slope', 'output_variation']: 
    f, axes = plt.subplots(1,5, figsize=(5,2), gridspec_kw=dict(wspace=0.5))
    print('----------- '+stat+' -----------')
    for i,ax in enumerate(axes):
        if i < 2: 
            gene = '1T'
            design = i+2
            ps = pairs
            title = gene+', design '+str(design)
            test_df = plot_df[(plot_df['gene']==gene) & (plot_df['design'].isin([0,design]))]
        elif i==2: 
            gene = '2T'
            ps = pairs[1:]
            title = gene
            test_df = plot_df[(plot_df['gene']==gene)]
        else: 
            gene = '2V'
            promoter = promoters[i-3]
            ps = pairs
            title = gene+', '+promoter
            test_df = plot_df[(plot_df['gene']==gene) & (plot_df['construct2_promoter']==promoter)]
        print('Circuit type: '+title)

        sns.stripplot(data=test_df, x='ts_label', y=stat, ax=ax, hue='condition', palette=condition_palette, legend=False)
        ax.set(ylabel='', xlabel='', title=title)
        if stat != 'slope' and stat != 'output_variation': ax.set(yscale='log')

        annotator = Annotator(ax, ps, data=test_df, x='ts_label', y=stat)
        annotator.configure(**annotate_kwargs,).apply_and_annotate()
        print('\n')
    axes[0].set(ylabel=stat)

In [None]:
# Save to OneDrive
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/'fig_designs-supp.pdf'))