In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

### Load data

Tuning data (`data`)

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'
metadata_path = rd.datadir/'projects'/'miR-iFFL'/'plasmids'
data, quantiles, stats, metadata = base.load_data(base_path, metadata_path, 'tuning', 'tuning')

In [None]:
# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
main_palette = metadata_dict['color']
main_markers = metadata_dict['markers']

ts_label = {'na': 'base', 'NT': 'OL', 'T': 'CL', 'none': '–'}

In [None]:
# Alternate palette for promoter panel
metadata_alt_dict = base.get_metadata(metadata_path/'construct-metadata.xlsx', 'promoters').set_index('construct').to_dict('dict')
promoter_palette = metadata_alt_dict['color']
promoter_markers = metadata_alt_dict['markers']

# Alternate palette for initial data
metadata_alt2_dict = base.get_metadata(metadata_path/'construct-metadata.xlsx', 'designs').set_index('construct').to_dict('dict')
initial_palette = metadata_alt_dict['color']
initial_markers = metadata_alt_dict['markers']

miR/TS characterization data (`data2`) from Emma

In [None]:
data2, quantiles2, stats2, metadata2 = base.load_data(base_path, metadata_path, 'miR_characterization')

In [None]:
# Calculate fold changes for orthogonal or matched conditions
by = ['condition','miR_construct','ts_construct']

# orthogonal exp: {miR} x {TS}x2
orthogonal_exp = ['ELP_exp09', 'ELP_exp12', 'ELP_exp13']
fcs_orthogonal = stats2[(stats2['exp'].isin(orthogonal_exp))].groupby(by=by)[['output_gmean_fc']].apply('mean').reset_index()
fcs_orthogonal = fcs_orthogonal.merge(metadata2, how='left', on=by)

# matched exp: {miR} x TSx{n}
matched_exp = ['ELP_exp11','ELP_exp49', 'ELP_exp50',]
fcs_matched = stats2[(stats2['exp'].isin(matched_exp))].groupby(by=by)[['output_gmean_fc']].apply('mean').reset_index()
fcs_matched = fcs_matched.merge(metadata2, how='left', on=by)

In [None]:
# Create color palettes for miR/ts characterization
metadata2['color'] = base.colors['gray']
metadata2['matched'] = metadata2['miR'] == metadata2['ts']
metadata2.loc[metadata2['matched'], 'color'] = base.colors['green']
metadata2.loc[metadata2['ts']=='none', 'color'] = 'black'

metadata_comb_dict = metadata2.set_index('condition').to_dict('dict')
matched_palette = metadata_comb_dict['color']

metadata2['color'] = 'black'
metadata2.loc[metadata2['ts_num']>0, 'color'] = base.colors['green']
metadata2.loc[metadata2['ts_num']==2, 'color'] = metadata2.loc[metadata2['ts_num']==2, 'color'].apply(base.get_light_color)
metadata2.loc[metadata2['ts_num']==4, 'color'] = metadata2.loc[metadata2['ts_num']==4, 'color'].apply(base.get_light_color).apply(base.get_light_color)

metadata_comb_dict = metadata2.set_index('condition').to_dict('dict')
ts_num_palette = metadata_comb_dict['color']

### Set up figure

In [None]:
base_size = base.font_sizes['base_size']
smaller_size = base.font_sizes['smaller_size']

sns.set_style('ticks')
sns.set_context('paper', font_scale=1.0, rc={'font.size': base_size, 'font.family': 'sans-serif', 'font.sans-serif':['Arial']})
plt.rcParams.update({'axes.titlesize': base_size, 'axes.labelsize': base_size, 'xtick.labelsize': smaller_size, 'ytick.labelsize': smaller_size,
                     'pdf.fonttype': 42, 
                     'ytick.major.size': 3, 'xtick.major.size': 3, 'ytick.minor.size': 2, 'ytick.major.pad': 2, 'xtick.major.pad': 2, 
                     'lines.linewidth': 1,
                     'axes.spines.right': False, 'axes.spines.top': False, 'axes.labelpad': 2})

In [None]:
# Create the overall figure, gridspec, and add subfigure labels
fig = plt.figure(figsize=(6.8504,8))
fig_gridspec = matplotlib.gridspec.GridSpec(4, 12, figure=fig,
    wspace=0.4, hspace=0.4, height_ratios=[1.75,1.5,1.75,3], width_ratios=[1]*12)
subfigures = {
    'A': fig.add_subfigure(fig_gridspec[0,:2]),
    'B': fig.add_subfigure(fig_gridspec[0,2:4]),
    'C': fig.add_subfigure(fig_gridspec[0,5:9]),
    'D': fig.add_subfigure(fig_gridspec[1,:6]),
    'G': fig.add_subfigure(fig_gridspec[0,9:]),
    'H': fig.add_subfigure(fig_gridspec[1,6:]),
    'E': fig.add_subfigure(fig_gridspec[2,:6]),
    'F': fig.add_subfigure(fig_gridspec[2,6:]),
    'I': fig.add_subfigure(fig_gridspec[3,:4]),
    'J': fig.add_subfigure(fig_gridspec[3,4:8]),
    'K': fig.add_subfigure(fig_gridspec[3,8:]),
}
for label, subfig in subfigures.items():
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}', fontsize=base.font_sizes['subpanel_label'], 
                                           fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))
scatter_kwargs = dict(s=4, jitter=0.1, linewidth=0.5, edgecolor='white')

output_path = rd.rootdir/'output'/'fig_tuning-supp'/'fig_tuning-supp.pdf'
fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['A']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.4, bottom=0.4, top=0.35, right=0.15)
axes = subfig.subplots(1,1,)

plot_df = stats[(((stats['miR']=='miR.FF5') & (stats['group']=='controller') & (stats['design']==1) & (stats['ts'].isin(['FF4x1','FF5x1','FF6x1']))) | (stats['group']=='base')) &
                         (stats['promoter']=='EF1a')]
xlim = (-0.5, len(plot_df['ts_kind'].unique())-0.5)
scatter_kwargs = dict(s=4, jitter=0.2, linewidth=0.5, edgecolor='white')

# stat CV
ax = axes
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='ts_kind', y='output_variation', hue='construct', palette=initial_palette,
                  legend=False, ax=ax, marker=initial_markers[construct], **scatter_kwargs)
ax.set(title='CV', xlim=xlim, xlabel='', ylabel='', ylim=(0.9,1.3))

ax.set_xticklabels([ts_label[x.get_text()] for x in ax.get_xticklabels()], rotation=45, ha='right',)
sns.despine(ax=ax)

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['C']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.52, bottom=0.4, top=0.35, right=0.15)
ax = subfig.subplots(1,1)

fcs = fcs_orthogonal
plot_df = fcs[(fcs['ts']!='na') & (fcs['miR']!='na') & (fcs['miR_promoter']=='hPGK.d') 
              & (fcs['ts_num'].isin([0,2]))].pivot(index='miR', columns='ts', values='output_gmean_fc')
sns.heatmap(plot_df, annot=True, fmt='.2f', cmap=sns.light_palette(base.colors['green'], as_cmap=True), ax=ax)
ax.set(xlabel='target sites (x2)', ylabel='microRNA', title='Relative target expr.')

# outline matched target sites
for i in range(len(plot_df)-1):
    ax.add_patch(matplotlib.patches.Rectangle((i, i), 1, 1, fill=False, edgecolor='black', lw=1))

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['G']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.5, bottom=0.52, top=0.35, right=0.1)
ax = subfig.subplots(1,1)

fcs = fcs_matched
plot_df = fcs[(fcs['miR_promoter']=='hPGK.d') & (fcs['miR']!='none')].pivot(index='miR', columns='ts_num', values='output_gmean_fc')
sns.heatmap(plot_df, annot=True, fmt='.2f', cmap=sns.light_palette(base.colors['green'], as_cmap=True), ax=ax, cbar=False)
ax.set(xlabel='number of matched\ntarget sites', ylabel='microRNA', title='Relative target expr.')

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['D']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.52, bottom=0.52, top=0.35, right=0.1)
axes = subfig.subplots(1,5, sharey=True)

plot_df = stats2[(stats2['ts']!='na') & (stats2['miR']!='na') & (stats2['miR_promoter']=='hPGK.d') 
                 & (stats2['ts_num'].isin([0,2])) & (stats2['exp'].isin(orthogonal_exp))]

for ax, (miR, d) in zip(axes, plot_df.groupby('miR')): 
    sns.stripplot(d, x='ts', y='output_gmean', hue='condition', palette=matched_palette, ax=ax, legend=False, **scatter_kwargs)
    ax.set(yscale='log', xlabel='', title=f'{miR}')
    sns.despine(ax=ax)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    ax.minorticks_off()

axes[2].set(xlabel='target site (x2)')
ylabel_bbox = axes[0].get_yticklabels()[0]
title_bbox = axes[0].set_title(axes[0].get_title())
axes[0].annotate(text='miR:', xy=(1, 0.5), xycoords=(ylabel_bbox, title_bbox), 
                 ha="right", va="center", fontsize=base_size,)
axes[0].set(ylabel='target')
        
fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['H']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.55, bottom=0.52, top=0.35, right=0.1)
axes = subfig.subplots(1,4, sharey=True)

plot_df = stats2[(stats2['miR_promoter']=='hPGK.d') & (stats2['miR']!='none') & (stats2['exp'].isin(matched_exp))]

for ax, (miR, d) in zip(axes, plot_df.groupby('miR')): 
    sns.stripplot(d, x='ts_num', y='output_gmean', hue='condition', palette=ts_num_palette, ax=ax, legend=False, **scatter_kwargs)
    ax.set(yscale='log', xlabel='', title=f'{miR}')
    sns.despine(ax=ax)
    ax.minorticks_off()

axes[1].set_xlabel('number of matched target sites', x=1, ha='center')
ylabel_bbox = axes[0].get_yticklabels()[0]
title_bbox = axes[0].set_title(axes[0].get_title())
axes[0].annotate(text='miR:', xy=(1, 0.5), xycoords=(ylabel_bbox, title_bbox),
            ha="right", va="center", fontsize=base_size, )
axes[0].set(ylabel='target')

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['E']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.52, bottom=0.5, top=0.7, right=0.1)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.35))

# EF1a miR controls
miR_order = ['none', 'miR.FF5', 'miR.FF4', 'miRE.FF5', 'miRE.FF4',]
plot_df = stats[((((stats['group']=='miR')) & (stats['miR_loc']=='CDS')) | (stats['group']=='base')) &
                (stats['promoter']=='EF1a')].copy()
plot_df['miR'] = plot_df['miR'].astype(pd.api.types.CategoricalDtype(categories=miR_order, ordered=True))
xlim = (-0.5, len(plot_df['construct'].unique())-0.5)
ylim = (2e1,8e4)

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='miR', y='output_gmean', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlim=xlim, xlabel='', ylabel='', yscale='log', ylim=ylim)
marker_baseline = stats.loc[(stats['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

# stat std
ax = axes[1]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='miR', y='output_std', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Std.', xlim=xlim, xlabel='', ylabel='', yscale='log', ylim=ylim)

# slope
ax = axes[2]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='miR', y='slope', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlim=xlim, xlabel='', ylim=(0,1.25), ylabel='',
       yticks=[0,0.25,0.5,0.75,1], yticklabels=['0.0','','0.5','','1.0'])

for ax in axes:
    ax.set_xticklabels([l.get_text().replace('.','-') for l in ax.get_xticklabels()], rotation=45, ha='right')
    sns.despine(ax=ax)
    ax.minorticks_off()

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['F']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.55, bottom=0.5, top=0.7, right=0.1)
axes = subfig.subplots(1,3, gridspec_kw=dict(width_ratios=(1,1,1), wspace=0.35))

# EF1a miR controls
ts_order = ['none','FF3x1','FF4x1','FF5x1','FF6x1']
plot_df = stats[((((stats['group']=='ts3')) & (stats['ts_num']==1)) | (stats['group']=='base')) &
                (stats['promoter']=='EF1a')].copy()
plot_df['ts'] = plot_df['ts'].astype(pd.api.types.CategoricalDtype(categories=ts_order, ordered=True))
xlim = (-0.5, len(plot_df['ts'].unique())-0.5)

# stat gmean
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='ts', y='output_gmean', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Mean', xlim=xlim, xlabel='', ylabel='', yscale='log', ylim=ylim)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
marker_baseline = stats.loc[(stats['group']=='marker'), 'output_gmean'].mean()
ax.axhline(marker_baseline, color='black', ls=':')

# stat std
ax = axes[1]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='ts', y='output_std', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Std.', xlim=xlim, xlabel='', ylabel='', yscale='log', ylim=ylim)

# slope
ax = axes[2]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='ts', y='slope', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(title='Slope', xlim=xlim, xlabel='', ylim=(0,1.25), ylabel='',
       yticks=[0,0.25,0.5,0.75,1], yticklabels=['0.0','','0.5','','1.0'])

for ax in axes:
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
    sns.despine(ax=ax)
    ax.minorticks_off()

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['I']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.5, bottom=0.45, top=1.125, right=0.15)
axes = subfig.subplots(2,1,)

miR_order = ['none','miR.FF4','miR.FF5','miRE.FF4','miRE.FF5']
plot_df = stats[(((stats['group']=='controller') & (stats['design']==1) & (stats['ts'].isin(['FF4x1','FF5x1','FF6x1']))) | (stats['group']=='base')) &
                (stats['promoter']=='EF1a')].copy()
plot_df['miR'] = plot_df['miR'].astype(pd.api.types.CategoricalDtype(categories=miR_order, ordered=True))
xlim = (-0.5, len(plot_df['miR'].unique())-0.5)

# stat std
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='miR', y='output_std', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(xlim=xlim, xlabel='', ylabel='Std.', yscale='log', xticklabels=['']*len(ax.get_xticklabels()))

# stat CV
ax = axes[1]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='miR', y='output_variation', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], **scatter_kwargs)
ax.set(xlim=xlim, xlabel='', ylabel='CV',)

for ax in axes: 
    ax.minorticks_off()
    sns.despine(ax=ax)

axes[1].set_xticklabels([l.get_text().replace('.','-\n') for l in ax.get_xticklabels()])

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['J']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.5, bottom=0.45, top=1.125, right=0.15)
axes = subfig.subplots(2,1,)

promoter_order = ['EF1a','CAG','EFS','hPGK']
plot_df = stats[(((stats['miR']=='miRE.FF4') & (stats['group']=='controller') & (stats['design']==1) & (stats['ts'].isin(['FF4x1','FF6x1']))) | (stats['group']=='base'))].copy()
plot_df['promoter'] = plot_df['promoter'].astype(pd.api.types.CategoricalDtype(categories=promoter_order, ordered=True))
plot_df.sort_values(['promoter','group','ts_kind'], inplace=True)

# remove outlier hPGK point
plot_df = plot_df[~((plot_df['promoter']=='hPGK') & (plot_df['exp']=='exp92') & (plot_df['biorep']==1))]

# shift xticks to add more space between promoter groups
buffer = 0.6
xlim = (-0.5, len(plot_df['construct'].unique())-0.5+buffer*(len(promoter_order)-1))
xtick_locs = np.concatenate([[x+i*(3+buffer) for x in range(3)] for i in range(len(promoter_order))])
construct_loc = {k:v for k,v in zip(plot_df['construct'].unique(), xtick_locs)}
plot_df['construct_loc'] = plot_df['construct'].replace(construct_loc)
metadata['construct_loc'] = metadata['construct'].map(construct_loc)
m = metadata.dropna()
m['construct_loc'] = m['construct_loc'].astype(str)
scatter_kwargs2 = dict(s=4, jitter=0.1, linewidth=0.5, edgecolor='white', native_scale=True)

# stat std
ax = axes[0]
for construct, group in plot_df.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct_loc', y='output_std', hue='construct', palette=promoter_palette,
                    legend=False, ax=ax, marker=promoter_markers[construct], **scatter_kwargs2)
ax.set(xlim=xlim, xlabel='', ylabel='Std.', yscale='log', xticks=xtick_locs)

# stat CV
ax = axes[1]
for construct, group in plot_df.groupby('construct', sort=False):
    sns.stripplot(data=group, x='construct_loc', y='output_variation', hue='construct', palette=promoter_palette,
                    legend=False, ax=ax, marker=promoter_markers[construct], **scatter_kwargs2)
ax.set(xlim=xlim, xlabel='', ylabel='CV', xticks=xtick_locs)

for ax in axes:
    # add shaded regions every other promoter
    span1 = (xtick_locs[2]+(xtick_locs[3]-xtick_locs[2])/2, xtick_locs[5]+(xtick_locs[6]-xtick_locs[5])/2,)
    span2 = (xtick_locs[8]+(xtick_locs[9]-xtick_locs[8])/2, xtick_locs[11]+(1+buffer)/2,)
    ax.axvspan(*span1, color=base.get_light_color(base.colors['gray']), alpha=0.2,)
    ax.axvspan(*span2, color=base.get_light_color(base.colors['gray']), alpha=0.2,)

    ax.set(xticklabels=['']*len(ax.get_xticklabels()))
    ax.minorticks_off()
    sns.despine(ax=ax)

# add promoter labels
for j, prom in enumerate(promoter_order):
    yloc = -6
    axes[1].annotate(prom, (1+j*(3+buffer),yloc), xycoords=('data','axes points'), ha='center', va='top', fontsize=smaller_size)

fig.savefig(rd.outfile(output_path))

In [None]:
subfig = subfigures['K']
rd.plot.adjust_subplot_margins_inches(subfig, left=0.5, bottom=0.45, top=1.125, right=0.15)
axes = subfig.subplots(2,1,)

plot_df = stats[(stats['group']=='controller') & (stats['miR']=='miRE.FF4') & (stats['design']==1) & (stats['promoter']=='EF1a')].copy()
xlim = (-0.5, len(plot_df['ts_num'].unique())-0.5)

# stat std
ax = axes[0]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='ts_num', y='output_std', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], s=4, jitter=0.05, linewidth=0.5, edgecolor='white')
ax.set(xlim=xlim, xlabel='', ylabel='Std.', yscale='log', xticklabels=['']*len(ax.get_xticklabels()))

# stat CV
ax = axes[1]
for construct, group in plot_df.groupby('construct'):
    sns.stripplot(data=group, x='ts_num', y='output_variation', hue='construct', palette=main_palette,
                    legend=False, ax=ax, marker=main_markers[construct], s=4, jitter=0.05, linewidth=0.5, edgecolor='white')
ax.set(xlim=xlim, xlabel='# target sites', ylabel='CV', ylim=(0,5))

for ax in axes:
    ax.minorticks_off()
    sns.despine(ax=ax)

fig.savefig(rd.outfile(output_path))

In [None]:
# Save to OneDrive
fig.savefig(rd.outfile(rd.datadir/'manuscripts'/'2024_miR-iFFL'/'figures'/'links'/'fig_tuning-supp.pdf'))