In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors
import matplotlib.cm as cm
import matplotlib.image
import matplotlib.patches
import matplotlib.ticker
import matplotlib.gridspec as gridspec
import h5py
import pandas as pd
import numpy as np
import svgutils.transform as sg
import scipy.signal
import scipy.stats

from pathlib import Path
import re

import itertools
sns.set_style('ticks')
sns.set_context('paper',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

import rushd as rd

In [None]:
def add_external_svgs(final_filename, base_svg_filename, subfigures_dict, external_svg_mapping):
    # Inspired by https://jetholt.com/micro/programmatically-merging-svg-files/
    # Units from https://www.w3.org/TR/css-values/#absolute-lengths
    def unit_to_pixels(unit_str):
        value = float(re.search(r'[0-9\.]+', unit_str).group())
        unit_mapping = {
            'cm': 96.0 / 2.54,
            'mm': 96.0 / 2.54 / 10.0,
            'Q':  96.0 / 2.54 / 40.0,
            'in': 96.0,
            'pc': 96.0 / 6.0,
            'pt': 96.0 / 72.0,
            'px': 1.0
        }
        for unit, conversion in unit_mapping.items():
            if unit_str.endswith(unit):
                return value * conversion
        # Default to pixels
        return value
    def pixels_to_base_units(svg, xy):
        # Converts (xy) units to whatever base units.
        raw_value = float(re.search(r'[0-9\.]+', svg.get_size()[0]).group())
        pixel_value = unit_to_pixels(svg.get_size()[0])

        return (xy[0] * raw_value / pixel_value, xy[1] * raw_value / pixel_value)

    def svg_size(svg):
        size_tup = svg.get_size()
        return (unit_to_pixels(size_tup[0]), unit_to_pixels(size_tup[1]))
    base_svg = sg.fromfile(base_svg_filename)
    # Get the size of the base SVG
    base_size = svg_size(base_svg)
    for subfig, mapping in external_svg_mapping:
        external_svg = sg.fromfile(mapping['filename'])
        external_size = svg_size(external_svg)
        
        if 'extents' in mapping:
            extents = mapping['extents']
        else:
            extents = (0, 0, 1, 1) # left, bottom, width, height

        # Calculate subfigure extents in pixel-space
        subfig_bbox = subfigures_dict[subfig].bbox.get_points()
        fig_bbox = subfigures_dict[subfig].figure.bbox.get_points()

        fig_space_extents = (
            subfig_bbox[0,0] + extents[0] * (subfig_bbox[1,0] - subfig_bbox[0,0]), # left
            subfig_bbox[0,1] + extents[1] * (subfig_bbox[1,1] - subfig_bbox[0,1]), # bottom
            (subfig_bbox[1,0] - subfig_bbox[0,0]) * (extents[2]), # width
            (subfig_bbox[1,1] - subfig_bbox[0,1]) * (extents[3]) # height
        )
        pixel_extents = (
            (fig_space_extents[0] - fig_bbox[0,0]) / (fig_bbox[1,0] - fig_bbox[0,0]) * base_size[0],
            (fig_space_extents[1] - fig_bbox[0,1]) / (fig_bbox[1,1]  - fig_bbox[0,1]) * base_size[1],
            (fig_space_extents[2]) / (fig_bbox[1,0] - fig_bbox[0,0]) * base_size[0],
            (fig_space_extents[3]) / (fig_bbox[1,1] - fig_bbox[0,1]) * base_size[1]
        )
        scale_factor = min(pixel_extents[2] / external_size[0], pixel_extents[3] / external_size[1])
        print(f'Base pixel size:{base_size}\nsubfig {subfig}:\n\tsize:{external_size}\n\tfig_space_extents:{fig_space_extents}\n\tpixel_extents:{pixel_extents}\n\tscale factor:{scale_factor}')
        # First, scale the  external SVG then move to the correct location.
        external_elem = external_svg.getroot()
        # Figure out the base units, and convert to that.
        offset = pixels_to_base_units(base_svg, (pixel_extents[0], base_size[1] - pixel_extents[1] - pixel_extents[3]))
        external_elem.moveto(*offset, scale_factor)
        base_svg.append(external_elem)
    base_svg.save(final_filename)

In [None]:
def draw_arrows(subfig, ax, x1, x2, y1, y2, dy, dir1, dir2, c1, c2):
    subfig.add_artist(matplotlib.lines.Line2D([x1, x1], [y1+0.02, y2],
        linewidth=2.5, color=c1, solid_capstyle='projecting', transform=ax.transAxes))
    subfig.add_artist(matplotlib.lines.Line2D([x2, x2], [y1+0.02, y2],
        linewidth=2.5, color=c2, solid_capstyle='projecting', transform=ax.transAxes))
    subfig.add_artist(matplotlib.patches.FancyArrow(x1, y2, dy * dir1, 0,
        width=0.025, color=c1, transform=ax.transAxes))
    subfig.add_artist(matplotlib.patches.FancyArrow(x2, y2, dy * dir2, 0,
        width=0.025, color=c2, transform=ax.transAxes))
    subfig.add_artist(matplotlib.lines.Line2D([0.2, 0.8], [y1, y1],
        linewidth=2, color='#262626', transform=ax.transAxes))


In [None]:
def summarize_escape_times(df):
    sorted_escapes = sorted(df.escape_time)
    counts = np.arange(1, len(sorted_escapes) + 1)
    interp_time = np.linspace(0, 25000, 500)
    escape_fraction =  np.interp(interp_time, sorted_escapes, counts) / len(sorted_escapes)
    return pd.DataFrame({
        'time': interp_time,
        'escape_fraction': escape_fraction,
        'stable_fraction': 1.0 - escape_fraction
    })

In [None]:
main_palette = {
    'tandem': '#A58245',
    'tandem_alt': '#F1BE64',
    'convergent': '#18B7CE',
    'divergent': '#AC4140',
    'gray': '#666666',
    'light_gray': '#888888',
    'axis_gray': '#262626'
}
font_sizes = {
    'colorbar_title': 8,
    'subaxis_title': 9,
    'line_annotation': 8,
    'data_annotation': 8,
}
light_palette = {k:v + '28' for k, v in main_palette.items()}
no_yellow_viridis = matplotlib.colors.ListedColormap(cm.get_cmap('viridis', 256)(np.linspace(0,0.8,256)))
k_formatter = matplotlib.ticker.FuncFormatter(lambda x, _: f'{x}' if abs(x) < 1000 else f'{x/1000:.0f}k')
fold_formatter = matplotlib.ticker.FuncFormatter(lambda x, _: f'{x}x')

## Base model behavior figure

### Visual description
A 160mm x 130mm image (6.30 x 5.12 in).

- **Panel A**: A cartoon of polymerases coming on and off a piece of DNA, showing the binding energy dE, accumulated supercoiling, and the key variables.
- **Panel B**: A diagram of the key tested variables in Figure 1, including circular/linear, different orientations, and delta spacing.
- **Panel C**: Three diagrams that show (normalized) reporter output (y-axis) vs induction (x-axis), with different plots for the different supercoiling initiation values and different curves for each orientation (constant spacing). These plots are all linear BCs.
- **Panel D**: Two subplots which compare linear vs plasmid BCs vs different induction conditions, using one of the sc initiation panels.
- **Panel E**: Three subplots. These are scatterplots (density plots?) of the three conditions (at full induction) that show the noise behavior of the system. Added on is a schematic reminding of the intrinsic/extrinsic noise.
- **Panel F**: Three subplots, showing reporter output as a function of induction for three different spacing levels.

### Caption

In [None]:
if (rd.datadir/'projects'/'tangles'/'simulations'/'fig_base_model_summary.gzip.0').exists():
    base_model_df = pd.read_parquet(
        rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'fig_base_model_summary.gzip.0', 'fig_base_model')
    )
    for i in range(1,8):
        base_model_df = base_model_df.append(
            pd.read_parquet(rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/f'fig_base_model_summary.gzip.{i}', 'fig_base_model')),
            ignore_index=True
        )
    print(f'Loaded {len(base_model_df)} total simulation runs from Parquet cached file')
else:
    base_model_df = []
    with h5py.File(
        rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'modeling_model_bc_topo_selection_summaries.h5', 'fig_base_model_cache'), 'r') as summaries:
        for group in summaries.keys():
            group_df = []
            for run in summaries[group].values():
                group_df.append(pd.DataFrame(data={
                    'inducible_mRNA': run['final_mRNA'][0,:],
                    'constant_mRNA': run['final_mRNA'][1,:]}))
                group_df[-1]['induction'] = run.attrs['gene.base_rate'][0]
                group_df[-1]['condition'] = run.attrs['comment'].decode('utf-8')
                group_df[-1]['is_plasmid'] = run.attrs['bcs.is_circular']
                for term in [
                    'rates.sc_dependent', 'coeff.sigma_squared',
                    'coeff.mRNA_drag_exponent', 'coeff.mRNA_drag_coeff',
                    'rnap.max_velocity', 'rnap.stall_torque', 'rnap.stall_width']:
                    group_df[-1][term] = run.attrs[term]
                gene_endpoints = sorted(np.concatenate((
                    run.attrs['gene.start'],
                    run.attrs['gene.end'])))
                group_df[-1]['spacing'] = int((gene_endpoints[2] - gene_endpoints[1]) / 0.34)
            base_model_df.append(pd.concat(group_df, ignore_index=True))
            print(f'Loaded {len(base_model_df[-1])} simulations...')
    base_model_df = pd.concat(base_model_df, ignore_index=True)
    print(f'Loaded {len(base_model_df)} total simulation runs!')
    base_model_df['condition'] = pd.Categorical(base_model_df['condition'])
    for i, split in enumerate(np.array_split(base_model_df, 8)):
        split.to_parquet(rd.outfile(rd.datadir/'projects'/'tangles'/'simulations'/f'fig_base_model_summary.gzip.{i}', 'fig_base_model_cache'), compression='gzip')

In [None]:
# Randomly downsample (replace later)
base_model_df = base_model_df.sample(frac=0.3, random_state=2022)

In [None]:
# Create the overall figure, gridspec, and add subfigure labels.
fig_bm = plt.figure(figsize=(6.30,6.82))
fig_bm_gridspec = gridspec.GridSpec(4, 2, figure=fig_bm,
    wspace=0.1, hspace=0.15, height_ratios=[0.8, 1.0, 1.0, 1.0])
fig_bm_subfigures = {
    'A': fig_bm.add_subfigure(fig_bm_gridspec[0,:]),
    'B': fig_bm.add_subfigure(fig_bm_gridspec[1,0]),
    'C': fig_bm.add_subfigure(fig_bm_gridspec[2,0]),
    'D': fig_bm.add_subfigure(fig_bm_gridspec[3,0]),
    'E': fig_bm.add_subfigure(fig_bm_gridspec[1,1]),
    'F': fig_bm.add_subfigure(fig_bm_gridspec[2,1]),
    'G': fig_bm.add_subfigure(fig_bm_gridspec[3,1])
}
for label, subfig in fig_bm_subfigures.items():
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}.', fontsize=13, fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))
fig_bm_palette = {
    'fig.bm.tandem_reporter_upstream': main_palette['tandem'],
    'fig.bm.tandem_reporter_downstream': main_palette['tandem_alt'],
    'fig.bm.convergent': main_palette['convergent'],
    'fig.bm.divergent': main_palette['divergent'],
    'independent': main_palette['light_gray'],
    'dependent': main_palette['gray'],
    'gray': main_palette['gray'],
    'axis_gray': main_palette['axis_gray']
    }

In [None]:
base_model_df[(base_model_df['rates.sc_dependent'] == 1.0) & (base_model_df.condition == 'fig1.tandem_downstream') & (np.abs(base_model_df['coeff.sigma_squared'] - 0.0316) < 0.001)]

In [None]:
sorted(base_model_df['coeff.sigma_squared'].unique())

In [None]:
# Plot Fig1b
df_bm_syntax = base_model_df[
    (
        base_model_df['is_plasmid'] == 0
    ) & (
        (base_model_df['condition'] == 'fig1.tandem_downstream') |
        (base_model_df['condition'] == 'fig1.tandem_upstream') |
        (base_model_df['condition'] == 'fig1.convergent') |
        (base_model_df['condition'] == 'fig1.divergent')
    ) & (
        (base_model_df['rates.sc_dependent'] == 0.0) |
        (
            (base_model_df['rates.sc_dependent'] == 1.0) &
            (
                (base_model_df['coeff.sigma_squared'] == 0.0) |
                (np.abs(base_model_df['coeff.sigma_squared'] - 0.0316) < 0.001)
            )
        )
    )
].copy()
df_bm_syntax['sc_condition'] = 'independent'
df_bm_syntax.loc[
    (df_bm_syntax['rates.sc_dependent'] == 1.0) &
    (df_bm_syntax['coeff.sigma_squared'] == 0.0), 'sc_condition'] = 'dependent'
df_bm_syntax.loc[
    (df_bm_syntax['rates.sc_dependent'] == 1.0) &
    (np.abs(df_bm_syntax['coeff.sigma_squared'] - 0.0316) < 0.001), 'sc_condition'] = 'dependent_squared'
df_bm_syntax['condition'] = df_bm_syntax['condition'].cat.remove_unused_categories()
#df_bm_syntax_normed = (
#    df_bm_syntax.set_index(
#        [
#            'condition',
#            'sc_condition',
#            'rates.sc_dependent',
#            'coeff.sigma_squared',
#            'induction'
#        ])['constant_mRNA'] # Select the constant_mRNA column
#    / df_bm_syntax.groupby([
#        'condition',
#        'sc_condition',
#        'rates.sc_dependent',
#        'coeff.sigma_squared',
#        'induction']).mean().xs( # Divide by the zero-induction, tandem case
#            df_bm_syntax['induction'].unique()[0], level='induction').xs(
#            'fig1.tandem_upstream', level='condition')['constant_mRNA']).reset_index()
df_bm_syntax['norm_constant_mRNA'] = df_bm_syntax['constant_mRNA'] / 200.0
df_bm_syntax['norm_induction'] = df_bm_syntax['induction'] / (1.0 / 120.0)
# Tweak subplot layout
fig_bm_subfigures['B'].subplots_adjust(left=0.175, bottom=0.225, right=0.9, top=0.8)
fig_bm_syntax_axes = fig_bm_subfigures['B'].subplots(ncols=4, sharey=True)
dashes = {'independent': (2.8,1.2), 'dependent': '', 'dependent_squared': ''}
for ax, condition in zip(fig_bm_syntax_axes, ['fig1.tandem_upstream', 'fig1.tandem_downstream', 'fig1.convergent', 'fig1.divergent']):
    fig_bm_palette['dependent_squared'] = fig_bm_palette[condition]
    sns.lineplot(data=df_bm_syntax[df_bm_syntax.condition == condition],
                x='norm_induction', y='constant_mRNA', style='sc_condition', hue='sc_condition', dashes=dashes, palette=fig_bm_palette, linewidth=1.5,
                legend=False, ax=ax, ci=None)
    sns.despine(ax=ax)
    ax.set_xlim([0.0, 2.5])
    ax.set_xticks([0.0, 1.0, 2.0])
    ax.set_xticklabels(['0x', '1x', '2x'])
    #ax.set_ylim([0.0, 1.4])
# Set labels
fig_bm_syntax_axes[0].set_ylabel('Reporter output')
fig_bm_syntax_axes[0].set_xlabel('')
fig_bm_syntax_axes[1].set_xlabel('Fold induction')
fig_bm_syntax_axes[2].set_xlabel('')
fig_bm_syntax_axes[3].set_xlabel('')
for ax, cond, locs in zip(fig_bm_syntax_axes,
        ['fig1.tandem_upstream', 'fig1.tandem_downstream', 'fig1.convergent', 'fig1.divergent'],
        [(1.8,1.1,0.2,0.2,1.15,0.45),
         (1.8,1.1,0.2,0.2,1.15,0.45),
         (1.8,1.1,0.3,0.2,1.4,0.45),
         (1.8,0.85,1.8,0.7,0.15,1.15)]):
    ax.text(locs[0], locs[1], 'f(1)', color=fig_bm_palette['independent'], fontsize=font_sizes['line_annotation'])
    ax.text(locs[2], locs[3], 'f(σ)', color=fig_bm_palette['dependent'], fontsize=font_sizes['line_annotation'])
    ax.text(locs[4], locs[5], 'f(σ,σ²)', color=fig_bm_palette[cond], fontsize=font_sizes['line_annotation'])
# Add annotations
fig_bm_subfigures['B'].text(0.5, 1.03, 'Tandem', horizontalalignment='center', transform=fig_bm_syntax_axes[0].transAxes, fontsize=font_sizes['subaxis_title'])
fig_bm_subfigures['B'].text(0.5, 1.03, 'Tandem', horizontalalignment='center', transform=fig_bm_syntax_axes[1].transAxes, fontsize=font_sizes['subaxis_title'])
fig_bm_subfigures['B'].text(0.5, 1.03, 'Convergent', horizontalalignment='center', transform=fig_bm_syntax_axes[2].transAxes, fontsize=font_sizes['subaxis_title'])
fig_bm_subfigures['B'].text(0.5, 1.03, 'Divergent', horizontalalignment='center', transform=fig_bm_syntax_axes[3].transAxes, fontsize=font_sizes['subaxis_title'])
draw_arrows(fig_bm_subfigures['B'], fig_bm_syntax_axes[0],
    0.35, 0.6, 1.15, 1.25, 0.07, 1, 1,
    fig_bm_palette['fig1.tandem_upstream'], fig_bm_palette['gray'])
draw_arrows(fig_bm_subfigures['B'], fig_bm_syntax_axes[1],
    0.35, 0.6, 1.15, 1.25, 0.07, 1, 1,
    fig_bm_palette['gray'], fig_bm_palette['fig1.tandem_upstream'])
draw_arrows(fig_bm_subfigures['B'], fig_bm_syntax_axes[2],
    0.28, 0.72, 1.15, 1.25, 0.07, 1, -1,
    fig_bm_palette['fig1.convergent'], fig_bm_palette['gray'])
draw_arrows(fig_bm_subfigures['B'], fig_bm_syntax_axes[3],
    0.4, 0.6, 1.15, 1.25, 0.07, -1, 1,
    fig_bm_palette['fig1.divergent'], fig_bm_palette['gray'])

fig_bm.savefig('test.svg')
#del df_bm_syntax

In [None]:
# Plot Fig 1c
df_bm_bcs = base_model_df[
    (
        (np.abs(base_model_df['coeff.sigma_squared'] - 0.0316) < 0.001) &
        (base_model_df['rates.sc_dependent'] == 1.0)
    ) & (
        (base_model_df['condition'] == 'fig1.tandem_downstream') |
        (base_model_df['condition'] == 'fig1.tandem_upstream') |
        (base_model_df['condition'] == 'fig1.convergent') |
        (base_model_df['condition'] == 'fig1.divergent')
    )
].copy()
df_bm_bcs['condition'] = df_bm_bcs['condition'].cat.remove_unused_categories()
df_bm_bcs['norm_induction'] = df_bm_bcs['induction'] / (1.0 / 120.0)
df_bm_bcs['constant_mRNA'] = df_bm_bcs['constant_mRNA'] / 200.0
fig_bm_subfigures['C'].subplots_adjust(left=0.2, bottom=0.225, right=0.95, top=0.90)
fig_bm_bcs_axes = fig_bm_subfigures['C'].subplots(ncols=2, sharey=True)
sns.lineplot(data=df_bm_bcs[df_bm_bcs['is_plasmid']==0.0],
             x='norm_induction', y='constant_mRNA', hue='condition',
             palette=fig_bm_palette, legend=False, linewidth=2,
             ax=fig_bm_bcs_axes[0])
sns.despine(ax=fig_bm_bcs_axes[0])
sns.lineplot(data=df_bm_bcs[df_bm_bcs['is_plasmid']==1.0],
             x='norm_induction', y='constant_mRNA', hue='condition',
             palette=fig_bm_palette, legend=False, linewidth=2,
             ax=fig_bm_bcs_axes[1])
sns.despine(ax=fig_bm_bcs_axes[1])
for ax in fig_bm_bcs_axes:
    ax.set_xlabel('')
    ax.set_xlim([0.0, 2.5])
    ax.set_xticks([0.0, 1.0, 2.0])
    ax.set_xticklabels(['0x', '1x', '2x'])
    ax.set_ylim([0.0, 1.2])
fig_bm_bcs_axes[0].set_ylabel('Reporter output')
fig_bm_subfigures['C'].text(0.5, 0.01, 'Fold induction')
fig_bm_subfigures['C'].text(0.5, 1.0, 'Linear', horizontalalignment='center', transform=fig_bm_bcs_axes[0].transAxes, fontsize=9, fontweight='bold')
fig_bm_subfigures['C'].text(0.5, 1.0, 'Circular', horizontalalignment='center', transform=fig_bm_bcs_axes[1].transAxes, fontsize=9, fontweight='bold')
fig_bm.savefig('test.svg')
del df_bm_bcs

In [None]:
# Plot Fig1d
df_bm_dist = base_model_df[
    (
        base_model_df['is_plasmid'] == 0
    ) & (
        #(base_model_df['condition'] == 'fig1.tandem_downstream') |
        (base_model_df['condition'] == 'fig1.tandem_upstream') |
        (base_model_df['condition'] == 'fig1.convergent') |
        (base_model_df['condition'] == 'fig1.divergent')
    ) & (
        np.abs(base_model_df['induction'] - 8.0e-3) < 1e-4
    ) & (
        (base_model_df['rates.sc_dependent'] == 1.0) &
        (np.abs(base_model_df['coeff.sigma_squared'] - 0.0316) < 0.001)
    )
].copy()
df_bm_dist['norm_reporter']  = df_bm_dist['constant_mRNA'] / 258.0
df_bm_dist['norm_inducible'] = df_bm_dist['inducible_mRNA'] / 258.0
fig_bm_subfigures['D'].subplots_adjust(left=0.175, bottom=0.25, right=0.7, top=0.9)
fig_bm_dist_ax = fig_bm_subfigures['D'].subplots(ncols=1, sharey=True)
sns.kdeplot(data=df_bm_dist[df_bm_dist['condition'] == 'fig1.tandem_upstream'],
            x='norm_inducible', y='norm_reporter', color=main_palette['tandem'],
            ax=fig_bm_dist_ax)
sns.despine(ax=fig_bm_dist_ax)
sns.kdeplot(data=df_bm_dist[df_bm_dist['condition'] == 'fig1.convergent'],
            x='norm_inducible', y='norm_reporter', color=main_palette['convergent'],
            ax=fig_bm_dist_ax)
sns.despine(ax=fig_bm_dist_ax)
sns.kdeplot(data=df_bm_dist[df_bm_dist['condition'] == 'fig1.divergent'],
            x='norm_inducible', y='norm_reporter', color=main_palette['divergent'],
            ax=fig_bm_dist_ax)
sns.despine(ax=fig_bm_dist_ax)
fig_bm_dist_ax.set_xlim([0.0, 1.0])
fig_bm_dist_ax.set_ylim([0.0, 1.0])
fig_bm_dist_ax.set_xticks(np.linspace(0.0,1.0,5))
fig_bm_dist_ax.set_yticks(np.linspace(0.0,1.0,5))

fig_bm_dist_ax.set_xlabel('Inducible gene output')
fig_bm_dist_ax.set_ylabel('Reporter output')

#fig_bm_dist_axes[0].arrow(0.3, 0.7, -0.2, 0.2)
#fig_bm_dist_axes[0].arrow(0.3, 0.7,  0.2, 0.2)
fig_bm_dist_ax.annotate(text='', xytext=(0.405, 0.8), xy=(0.2, 1.0), arrowprops={'facecolor': 'black', 'edgecolor': '#000000ff', 'width': 0.8, 'headwidth': 5.0, 'headlength': 5.0, 'shrink': 0.0, 'capstyle': 'round'})
fig_bm_dist_ax.annotate(text='', xytext=(0.395, 0.8), xy=(0.6, 1.0), arrowprops={'facecolor': 'black', 'edgecolor': '#000000ff', 'width': 0.8, 'headwidth': 5.0, 'headlength': 5.0, 'shrink': 0.0, 'capstyle': 'round'})
fig_bm_dist_ax.text(0.2, 1.04, 'int.', horizontalalignment='center', transform=fig_bm_dist_ax.transData, fontsize=9, fontstyle='italic')
fig_bm_dist_ax.text(0.6, 1.04, 'ext.', horizontalalignment='center', transform=fig_bm_dist_ax.transData, fontsize=9, fontstyle='italic')
fig_bm_subfigures['D'].text(0.4, 1.00, 'noise', horizontalalignment='center', transform=fig_bm_dist_ax.transData, fontsize=9, fontstyle='italic')
fig_bm.savefig('test.svg')
del df_bm_dist


In [None]:
# Plot Fig1f
df_bm_spacing = base_model_df[
    (
        base_model_df['is_plasmid'] == 0
    ) & (
        #(base_model_df['condition'] == 'fig1.tandem_downstream') |
        (base_model_df['condition'] == 'fig1f.spacing.tandem_upstream') |
        (base_model_df['condition'] == 'fig1f.spacing.convergent') |
        (base_model_df['condition'] == 'fig1f.spacing.divergent')
    ) & (
        (base_model_df['rates.sc_dependent'] == 1.0) &
        (base_model_df['coeff.sigma_squared'] == 0.02)
    )
].copy()
df_bm_spacing['norm_induction'] = df_bm_spacing['induction'] / (1.0 / 120.0)
df_bm_spacing['norm_reporter']  = df_bm_spacing['constant_mRNA'] / 300.0
df_bm_spacing['condition'] = df_bm_spacing['condition'].cat.remove_unused_categories()
max_spacing_median = df_bm_spacing.groupby(['condition', 'norm_induction', 'spacing']).median()['norm_reporter'].xs(10000, level='spacing')
df_bm_spacing['delta_reporter'] = df_bm_spacing.groupby(['condition', 'norm_induction', 'spacing'])['norm_reporter'].transform(lambda col: col - max_spacing_median.xs(col.name[0], level='condition').xs(col.name[1]))
fig_bm_subfigures['F'].subplots_adjust(left=0.175, bottom=0.25, right=0.80, top=0.85)
fig_bm_spacing_induct_axes = fig_bm_subfigures['F'].subplots(ncols=3, sharey=True)
# Break the share on the fourth axis
for condition, ax in zip(['fig1f.spacing.tandem_upstream', 'fig1f.spacing.convergent', 'fig1f.spacing.divergent'], fig_bm_spacing_induct_axes):
    ax.plot([0,2],[0,0], color=fig_bm_palette['gray'], linestyle=':')
    sns.lineplot(data=df_bm_spacing[df_bm_spacing['condition']==condition], x='norm_induction', y='delta_reporter', hue='spacing',
        palette='viridis', hue_norm=matplotlib.colors.Normalize(vmin=500, vmax=12000),
        legend=False, ax=ax, ci=None)
    ax.set_facecolor(light_palette[condition.replace('_','.').split('.')[2]])
    sns.despine(ax=ax)
    ax.set_xticks([0,1,2])
    ax.xaxis.set_major_formatter(fold_formatter)
    for tick, align in zip(ax.xaxis.get_major_ticks(), ['left', 'center', 'right']):
        tick.label1.set_horizontalalignment(align)
    
# Add colorbar
fig_bm_spacing_induct_cbar = fig_bm_subfigures['F'].add_axes([0.84, 0.3, 0.035, 0.5])
fig_bm_subfigures['F'].colorbar(cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=500, vmax=12000), cmap='viridis'), cax=fig_bm_spacing_induct_cbar)
fig_bm_spacing_induct_cbar.set_yticks([500,5000, 10000])
fig_bm_spacing_induct_cbar.yaxis.set_major_formatter(k_formatter)
fig_bm_spacing_induct_cbar.set_title('Inter-gene\nspacing', fontsize=font_sizes['colorbar_title'], loc='left')
for header, ax in zip(['Tandem', 'Convergent', 'Divergent'], fig_bm_spacing_induct_axes[:3]):
    ax.set_xlim([0.0, 2.0])
    ax.set_ylim([-0.4,0.4])
    fig_bm_subfigures['F'].text(0.5, 1.1, header, horizontalalignment='center', transform=ax.transAxes, fontsize=8, fontweight='bold')
fig_bm_spacing_induct_axes[0].set_ylabel('ΔReporter output')
fig_bm_spacing_induct_axes[0].set_xlabel('')
fig_bm_spacing_induct_axes[1].set_xlabel('Fold induction')
fig_bm_spacing_induct_axes[2].set_xlabel('')
fig_bm.savefig('test.svg')

In [None]:
# Plot Fig1g
fig1_subfigures['G'].subplots_adjust(left=0.175, bottom=0.25, right=0.80, top=0.85)
fig_1g_axes = fig1_subfigures['G'].subplots(ncols=3, sharey=True)
# Break the share on the fourth axis
for condition, ax in zip(['fig1f.spacing.tandem_upstream', 'fig1f.spacing.convergent', 'fig1f.spacing.divergent'], fig_1g_axes):
    ax.plot([0,2],[0,0], color=fig1_palette['gray'], linestyle=':')
    sns.lineplot(data=df_1fg[df_1fg['condition']==condition], x='spacing', y='delta_reporter', hue='norm_induction',
        palette='flare_r', hue_norm=matplotlib.colors.Normalize(vmin=0, vmax=2),
        legend=False, ax=ax, ci=None)
    ax.set_facecolor(light_palette[condition.replace('_','.').split('.')[2]])
    sns.despine(ax=ax)
    
# Add colorbar
fig_1g_cbar_ax = fig1_subfigures['G'].add_axes([0.84, 0.3, 0.035, 0.5])
fig1_subfigures['G'].colorbar(cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=0, vmax=2), cmap='flare_r'), cax=fig_1g_cbar_ax)
fig_1g_cbar_ax.set_yticks([0,1,2])
fig_1g_cbar_ax.yaxis.set_major_formatter(fold_formatter)
fig_1g_cbar_ax.set_title('Fold\ninduction', fontsize=font_sizes['colorbar_title'], loc='left')
for header, ax in zip(['Tandem', 'Convergent', 'Divergent'], fig_1g_axes[:3]):
    ax.set_xscale('log')
    ax.set_xlim([500,10000])
    #ax.set_xlim([0.0, 2.0])
    #ax.set_ylim([-0.4,0.4])
    fig1_subfigures['G'].text(0.5, 1.1, header, horizontalalignment='center', transform=ax.transAxes, fontsize=8, fontweight='bold')
fig_1g_axes[0].set_ylabel('ΔReporter output')
fig_1g_axes[0].set_xlabel('')
fig_1g_axes[1].set_xlabel('Inter-gene spacing')
fig_1g_axes[2].set_xlabel('')
fig1.savefig('test.svg')
#del df_1fg

In [None]:
# Add external images
fig1.savefig(rd.outfile(rd.rootdir/'writeups'/'figures'/'modeling_paper'/'fig_1_mpl.svg','fig1'))
add_external_svgs(
    rd.outfile(rd.rootdir/'writeups'/'figures'/'modeling_paper'/'fig_1.svg', 'fig1_assemble'),
    rd.infile(rd.rootdir/'writeups'/'figures'/'modeling_paper'/'fig_1_mpl.svg', 'fig1_assemble'), fig1_subfigures, [
    ('A', {
        'filename': rd.infile(rd.rootdir/'writeups'/'figures'/'modeling_paper'/'fig_1a.svg', 'fig1_assemble'),
        'extents': (0.05, 0.05, 0.5, 0.9)
    }),
    ('A', {
        'filename': rd.infile(rd.rootdir/'writeups'/'figures'/'modeling_paper/fig_1b.svg', 'fig1_assemble'),
        'extents': (0.55, 0.05, 0.5, 0.9)
    })
])

In [None]:
del summary_df

## Figure 2

In [None]:
if (rd.datadir/'projects'/'tangles'/'simulations'/'fig2_reprocessed.gzip').exists():
    fig2_df = pd.read_parquet(rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'fig2_reprocessed.gzip', 'fig2'))
    print(f'Loaded {len(fig2_df)} total datapoints from Parquet cached file')
else:
    with h5py.File(
        rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'modeling_fig2_combined.h5', 'fig2_cache'), 'r') as fig2_raw:
        fig2_df = []
        i = 0
        for group in fig2_raw.keys():
            group_df = []
            for run in fig2_raw[group].values():
                run_shape = run['mRNA'].shape
                for subrun in range(run_shape[2]):
                    inducible = run['mRNA'][:,0,:]
                    constant  = run['mRNA'][:,1,:]
                    group_df.append(pd.DataFrame(data={
                        'inducible_mRNA': inducible[:,subrun],
                        'constant_mRNA':  constant[:,subrun],
                        'time': run['time']}))
                    group_df[-1]['induction'] = run.attrs['step_induction']
                    group_df[-1]['condition'] = run.attrs['comment'].decode('utf-8')
                    group_df[-1]['idx'] = i
                    i += 1
                    gene_endpoints = sorted(np.concatenate((
                        run.attrs['gene.start'],
                        run.attrs['gene.end'])))
                    group_df[-1]['spacing'] = int((gene_endpoints[2] - gene_endpoints[1]) / 0.34)
                    for term in [
                        'step_time', 'coeff.sigma_squared',]:
                        group_df[-1][term] = run.attrs[term]
            fig2_df.append(pd.concat(group_df, ignore_index=True))
            print(f'Loaded {len(fig2_df[-1])} datapoints...')
        fig2_df = pd.concat(fig2_df, ignore_index=True)
        print(f'Loaded {len(fig2_df)} total datapoints!')
        fig2_df.to_parquet(rd.outfile(rd.datadir/'projects'/'tangles'/'simulations'/'fig2_reprocessed.gzip', 'fig2_cache'), compression='gzip')

In [None]:
# Create the overall figure, gridspec, and add subfigure labels.
fig2 = plt.figure(figsize=(6.30,5.12))
fig2_gridspec = gridspec.GridSpec(3, 3, figure=fig2,
    wspace=0.1, hspace=0.1, height_ratios=[1.0, 1.0, 1.0])
fig2_subfigures = {
    'A': fig2.add_subfigure(fig2_gridspec[0,:]),
    'B': fig2.add_subfigure(fig2_gridspec[1,0]),
    'C': fig2.add_subfigure(fig2_gridspec[1,1:]),
    'D': fig2.add_subfigure(fig2_gridspec[2,:])
}
for label, subfig in fig2_subfigures.items():
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}.', fontsize=13, fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))
fig2_palette = {
    'fig2.tandem_down': main_palette['tandem'],
    'fig2.tandem_up': '#f1be64',
    'fig2.convergent': main_palette['convergent'],
    'fig2.divergent': main_palette['divergent'],
    'gray': main_palette['gray'],
    'background_gray': '#DDDDDD'
    }

In [None]:
# Prep Fig2a
# For fig2a, find three single runs (tandem, convergent, divergent) at maximum induction
run_ids = {condition: fig2_df[
    (fig2_df['condition']==condition)
    & (np.abs(fig2_df['induction'] - 1.0) < 0.01)
    & (fig2_df['coeff.sigma_squared'] == 0.02)]['idx'].iloc[0] for condition in fig2_df['condition'].unique()}

In [None]:
# Plot Fig2a
fig2_subfigures['A'].subplots_adjust(left=0.08, bottom=0.225, right=0.9, top=0.8)
fig_2a_axes = fig2_subfigures['A'].subplots(ncols=4, sharey=True)
for ax, (cond, id) in zip(fig_2a_axes, run_ids.items()):
    run = fig2_df[fig2_df.idx == id].copy()
    run['normed_constant'] = run['constant_mRNA'] / 380
    run['normed_inducible']= run['inducible_mRNA'] / 380
    ax.axvspan(10000, max(run['time']), color=fig2_palette['background_gray'])
    sns.lineplot(data=run, x='time', y='normed_constant', color=fig2_palette[cond], linewidth=1, legend=False, ax=ax)
    sns.lineplot(data=run, x='time', y='normed_inducible', color=fig2_palette['gray'], linewidth=1, legend=False, ax=ax)
    sns.despine(ax=ax)
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_xlim([0,40000])
    ax.set_ylim([-0.02,1.2])
fig_2a_axes[0].set_ylabel('Gene output')
fig2_subfigures['A'].text(0.5, 0.03, "Time (s)", horizontalalignment='center')
for ax, cond in zip(fig_2a_axes, run_ids.keys()):
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_horizontalalignment('right')
    if cond.endswith('tandem_down'):
        draw_arrows(fig2_subfigures['A'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig2_palette[cond], fig2_palette['gray'])
    elif cond.endswith('tandem_up'):
        draw_arrows(fig2_subfigures['A'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig2_palette['gray'], fig2_palette[cond])
    elif cond.endswith('convergent'):
        draw_arrows(fig2_subfigures['A'], ax,
            0.28, 0.72, 1.1, 1.25, 0.07, 1, -1,
            fig2_palette[cond], fig2_palette['gray'])
    elif cond.endswith('divergent'):
        draw_arrows(fig2_subfigures['A'], ax,
            0.4, 0.6, 1.1, 1.25, 0.07, -1, 1,
            fig2_palette[cond], fig2_palette['gray'])

fig2.savefig('test.svg')

In [None]:
# Plot Fig2b
fig2b_df = fig2_df[
    (fig2_df['coeff.sigma_squared'] == 0.02) &
    (np.abs(fig2_df['induction'] - 1.0) < 0.01)
].copy()
fig2b_df['normed_constant'] = fig2b_df['constant_mRNA'] / 380
fig2b_std = fig2b_df.groupby(['condition', 'time']).std().reset_index()
fig2b_mean = fig2b_df.groupby(['condition', 'time']).mean().reset_index()
pre_induction_noise = fig2b_std[
    (fig2b_mean.time < 10000) &
    (fig2b_mean.time > 1000)]['normed_constant'].mean()
fig2_subfigures['B'].subplots_adjust(left=0.25, bottom=0.24, right=0.9, top=0.9)
fig_2b_ax = fig2_subfigures['B'].subplots(ncols=1)
fig_2b_ax.axvspan(10000, max(fig2b_df['time']), color=fig2_palette['background_gray'])
sns.lineplot(data=fig2b_std, x='time', y='normed_constant', hue='condition',
    palette=fig2_palette, linewidth=1, estimator=None, ax=fig_2b_ax, legend=None)
fig_2b_ax.axhline(y=pre_induction_noise, color=fig2_palette['gray'], linestyle=':')
sns.despine(ax=fig_2b_ax)
fig_2b_ax.set_ylabel('Standard deviation')
fig_2b_ax.set_xlabel('Time (s)')
fig_2b_ax.set_xlim([0, 40000])
fig_2b_ax.set_ylim([0, 0.4])
fig2.savefig('test.svg')

In [None]:
# Plot Fig2d
# Calculate average convolution
def df_convolve_2d(df):
    constant_normed  = (df['constant_mRNA'] - df['constant_mRNA'].mean())
    inducible_normed = (df['inducible_mRNA'] - df['inducible_mRNA'].mean())
    geom_norm = np.sqrt(
        scipy.signal.correlate(constant_normed, constant_normed, mode='valid') *
        scipy.signal.correlate(inducible_normed, inducible_normed, mode='valid'))
    geom_norm = geom_norm if geom_norm > 1e-10 else 1.0
    return pd.DataFrame({
        'lags':scipy.signal.correlation_lags(len(constant_normed), len(inducible_normed)),
        'cross_corr':scipy.signal.correlate(inducible_normed, constant_normed) / geom_norm
    })
fig2_convolutions = fig2_df[
    (fig2_df.time > 15000)
    & (
        (np.abs(fig2_df.induction - 0.464) < 0.01) |
        (np.abs(fig2_df.induction - 1.0) < 0.01) |
        (np.abs(fig2_df.induction - 2.61) < 0.01)
    )
    & (fig2_df['coeff.sigma_squared'] == 0.02)].groupby([
        'condition', 'coeff.sigma_squared', 'induction', 'idx'
    ]).apply(df_convolve_2d).reset_index().groupby([
        'condition', 'induction', 'lags']).mean().reset_index()
time_delta = np.diff(fig2_df['time'].unique())[0]
fig2_convolutions['time'] = fig2_convolutions['lags'] * time_delta
fig2_subfigures['D'].subplots_adjust(left=0.1, bottom=0.225, right=0.82, top=0.8)
fig_2d_axes = fig2_subfigures['D'].subplots(ncols=4, sharey=True)
for condition, ax in zip(
        ['fig2.tandem_down', 'fig2.tandem_up', 'fig2.convergent', 'fig2.divergent'],
        fig_2d_axes):
    sns.lineplot(data=fig2_convolutions[fig2_convolutions.condition == condition],
        x='time', y='cross_corr', linewidth=1.5, palette=no_yellow_viridis,
        hue='induction', hue_norm=matplotlib.colors.LogNorm(vmin=0.4, vmax=2.7),
        legend=None, ax=ax)
    sns.despine(ax=ax)
    ax.axhline(y=0, color='k')
    ax.axvline(x=0, color='#888888', linestyle=':')
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_xlim([-25000,25000])
    ax.set_ylim([-1.0,0.5])
    ax.xaxis.set_major_formatter(k_formatter)
    for tick, align in zip(ax.xaxis.get_major_ticks(), ['left', 'center', 'right']):
        tick.label1.set_horizontalalignment(align)
    if condition.endswith('tandem_down'):
        draw_arrows(fig2_subfigures['D'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig2_palette[condition], fig2_palette['gray'])
    elif condition.endswith('tandem_up'):
        draw_arrows(fig2_subfigures['D'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig2_palette['gray'], fig2_palette[condition])
    elif condition.endswith('convergent'):
        draw_arrows(fig2_subfigures['D'], ax,
            0.28, 0.72, 1.1, 1.25, 0.07, 1, -1,
            fig2_palette[condition], fig2_palette['gray'])
    elif condition.endswith('divergent'):
        draw_arrows(fig2_subfigures['D'], ax,
            0.4, 0.6, 1.1, 1.25, 0.07, -1, 1,
            fig2_palette[condition], fig2_palette['gray'])
fig_2d_axes[0].set_ylabel('Cross-correlation')
fig2_subfigures['D'].text(0.5, 0.03, "Time offset (s)", horizontalalignment='center')
# Create colorbar
fig_2d_cbar_ax = fig2_subfigures['D'].add_axes([0.84, 0.225, 0.02, 0.7])
fig2d_cbar = fig2_subfigures['D'].colorbar(cm.ScalarMappable(
    norm=matplotlib.colors.LogNorm(vmin=0.4, vmax=2.7), cmap=no_yellow_viridis), cax=fig_2d_cbar_ax)
fig_2d_cbar_ax.yaxis.set_major_formatter(fold_formatter)
fig_2d_cbar_ax.set_title('Fold\ninduction', fontsize=font_sizes['colorbar_title'], loc='left')
fig2d_cbar.set_ticks([0.4,1.0,2.6], labels=['0.4x', '1.0x', '2.6x'])
fig2d_cbar.set_ticks([0.5, 0.6, 0.7, 0.8, 0.9, 2.0], labels=[], minor=True)
fig2.savefig('test.svg')

In [None]:
fig2.savefig(rd.outfile(rd.rootdir/'writeups'/'figures'/'modeling_paper'/'fig_2_mpl.svg', 'fig2'))

In [None]:
del fig2_df

## Figure 3 + 4 (Toggle)

In [None]:

# Load Figure 3 dataset
if (rd.datadir/'projects'/'tangles'/'simulations'/'fig3_examples.gzip').exists():
    fig3_examples = pd.read_parquet(rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'fig3_examples.gzip', 'fig3'))
    print(f'Loaded {len(fig3_examples)} total example datapoints from Parquet cached file')
else:
    with h5py.File(
        rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'modeling_fig3_combined.h5', 'fig3_examples_cache'), 'r') as fig3_raw:
        n_examples = 5
        example_counts = {}
        fig3_examples = []
        i = 1
        for group in fig3_raw.keys():
            for run in fig3_raw[group].values():
                run_shape = run['mRNA'].shape
                condition = run.attrs['comment'].decode('utf-8')
                if not condition.startswith('fig3'):
                    continue
                sc_dependent = run.attrs['rates.sc_dependent']
                is_plasmid = run.attrs['bcs.is_circular']
                hill_n     = run.attrs['hill_coeff']
                mRNA_deg_factor = run.attrs['mRNA_deg_factor']
                k_val      = run.attrs['K_val']
                k_factor   = run.attrs['K_factor']
                mRNA_deg_rate = run.attrs['rates.mRNA_degradation']
                if 'time' in run.keys():
                    time = run['time']
                else:
                    time = np.linspace(0.0, 50000.0, 500)
                c_tup = (condition, hill_n, k_val, k_factor, sc_dependent, is_plasmid, mRNA_deg_rate)
                if c_tup not in example_counts or example_counts[c_tup] < n_examples:
                    fig3_examples.append(pd.DataFrame(data={
                        'condition': condition,
                        'sc_dependent': sc_dependent,
                        'is_plasmid': is_plasmid,
                        'mRNA_deg_rate': mRNA_deg_rate,
                        'mRNA_deg_factor': mRNA_deg_factor,
                        'hill_n': hill_n,
                        'k_val': k_val,
                        'k_factor': k_factor,
                        'time': time,
                        'mRNA_1': run['mRNA'][:,0,0],
                        'mRNA_2': run['mRNA'][:,1,0],
                        'idx': i
                    }))
                    i += 1
                    if c_tup not in example_counts:
                        example_counts[c_tup] = 1
                    else:
                        example_counts[c_tup] += 1
        fig3_examples = pd.concat(fig3_examples)
        print(f'Loaded {len(fig3_examples)} example datapoints!')
        fig3_examples.to_parquet(rd.outfile(rd.datadir/'projects'/'tangles'/'simulations'/'fig3_examples.gzip', 'fig3_examples_cache'), compression='gzip')

In [None]:
if (rd.datadir/'projects'/'tangles'/'simulations'/'fig3_basin_examples.gzip').exists():
    fig3_basin_examples = pd.read_parquet(rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'fig3_basin_examples.gzip', 'fig3'))
    print(f'Loaded {len(fig3_basin_examples)} total example datapoints from Parquet cached file')
else:
    with h5py.File(
        rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'modeling_fig3_combined.h5', 'fig3_basin_examples_cache'), 'r') as fig3_raw:
        n_examples = 1000
        example_counts = {}
        fig3_basin_examples = []
        i = 1
        for group in fig3_raw.keys():
            group_dfs = []
            for run in fig3_raw[group].values():
                run_shape = run['mRNA'].shape
                condition = run.attrs['comment'].decode('utf-8')
                if not condition.startswith('fig3'):
                    continue
                sc_dependent = run.attrs['rates.sc_dependent']
                is_plasmid = run.attrs['bcs.is_circular']
                hill_n     = run.attrs['hill_coeff']
                mRNA_deg_factor = run.attrs['mRNA_deg_factor']
                k_val      = run.attrs['K_val']
                k_factor   = run.attrs['K_factor']
                mRNA_deg_rate = run.attrs['rates.mRNA_degradation']
                if 'time' in run.keys():
                    time = run['time']
                else:
                    time = np.linspace(0.0, 50000.0, 500)
                c_tup = (condition, hill_n, k_val, k_factor, sc_dependent, is_plasmid, mRNA_deg_rate)
                mRNA = run['mRNA'][:,:,:]
                if c_tup not in example_counts or example_counts[c_tup] < n_examples:
                    mRNA_1 = scipy.interpolate.interp1d(time, mRNA[:,0,:], axis=0)
                    mRNA_2 = scipy.interpolate.interp1d(time, mRNA[:,1,:], axis=0)
                    mRNA_1_points = mRNA_1([10200, 12500, 17500, 25000])
                    mRNA_2_points = mRNA_2([10200, 12500, 17500, 25000])
                    group_dfs.append(pd.DataFrame(data={
                        'condition': condition,
                        'sc_dependent': sc_dependent,
                        'is_plasmid': is_plasmid,
                        'mRNA_deg_rate': mRNA_deg_rate,
                        'mRNA_deg_factor': mRNA_deg_factor,
                        'hill_n': hill_n,
                        'k_val': k_val,
                        'k_factor': k_factor,
                        'mRNA_1_at_10200': mRNA_1_points[0,:],
                        'mRNA_2_at_10200': mRNA_2_points[0,:],
                        'mRNA_1_at_12500': mRNA_1_points[1,:],
                        'mRNA_2_at_12500': mRNA_2_points[1,:],
                        'mRNA_1_at_17500': mRNA_1_points[2,:],
                        'mRNA_2_at_17500': mRNA_2_points[2,:],
                        'mRNA_1_at_25000': mRNA_1_points[3,:],
                        'mRNA_2_at_25000': mRNA_2_points[3,:],
                    }))
                    i += 1
                    if c_tup not in example_counts:
                        example_counts[c_tup] = run_shape[2]
                    else:
                        example_counts[c_tup] += run_shape[2]
            fig3_basin_examples.append(pd.concat(group_dfs, ignore_index=True))
            print(f'Loaded {len(fig3_basin_examples[-1])} examples from group')
        fig3_basin_examples = pd.concat(fig3_basin_examples, ignore_index=True)
        print(f'Loaded {len(fig3_basin_examples)} example datapoints!')
        fig3_basin_examples.to_parquet(rd.outfile(rd.datadir/'projects'/'tangles'/'simulations'/'fig3_basin_examples.gzip', 'fig3_basin_examples_cache'), compression='gzip')

In [None]:
if (rd.datadir/'projects'/'tangles'/'simulations'/'fig3_summaries.gzip').exists():
    fig3_df = pd.read_parquet(rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'fig3_summaries.gzip', 'fig3'))
    print(f'Loaded {len(fig3_df)} simulation runs from Parquet cached file')
else:
    pass
    with h5py.File(
        rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'modeling_fig3_combined.h5', 'fig3_cache'), 'r') as fig3_raw:
        fig3_df = []
        i = 0
        # Extract the Spearman correlation between the two mRNA values
        # alongside storing the mean-centered correlation and auto-correlation
        # of the two.
        for group in fig3_raw.keys():
            group_df = []
            for run in fig3_raw[group].values():
                run_shape = run['mRNA'].shape
                condition = run.attrs['comment'].decode('utf-8')
                if not condition.startswith('fig3'):
                    continue
                hill_n    = run.attrs['hill_coeff']
                k_val     = run.attrs['K_val']
                k_factor  = run.attrs['K_factor']
                mRNA_deg_factor = run.attrs['mRNA_deg_factor']
                sc_dependent = run.attrs['rates.sc_dependent']
                is_plasmid = run.attrs['bcs.is_circular']
                mRNA_deg_rate = run.attrs['rates.mRNA_degradation']
                if 'time' in run.keys():
                    time = run['time']
                else:
                    time = np.linspace(0.0, 50000.0, 500)
                escape_times = np.zeros(run_shape[2])
                mRNA = run['mRNA'][:,:,:]
                for subrun in range(run_shape[2]):
                    mRNA_1 = mRNA[:,0,subrun]
                    mRNA_2 = mRNA[:,1,subrun]
                    basin_escape = np.nonzero(mRNA_2 > mRNA_1)[0]
                    if len(basin_escape) > 0:
                        escape_times[subrun] = time[basin_escape[0]]
                    else:
                        escape_times[subrun] = np.inf
                group_df.append(pd.DataFrame(data={
                    'condition': condition,
                    'sc_dependent': sc_dependent,
                    'is_plasmid': is_plasmid,
                    'mRNA_deg_rate': mRNA_deg_rate,
                    'hill_n': hill_n,
                    'k_val': k_val,
                    'k_factor': k_factor,
                    'mRNA_deg_factor': mRNA_deg_factor,
                    'escape_time': escape_times}))
            fig3_df.append(pd.concat(group_df, ignore_index=True))
            print(f'Loaded {len(fig3_df[-1])} simulations...')
        fig3_df = pd.concat(fig3_df, ignore_index=True)
        print(f'Loaded {len(fig3_df)} total simulations!')
        fig3_df.to_parquet(rd.outfile(rd.datadir/'projects'/'tangles'/'simulations'/'fig3_summaries.gzip', 'fig3_cache'), compression='gzip')

In [None]:
# Load topo dataset
if (rd.datadir/'projects'/'tangles'/'simulations'/'fig5_examples.gzip').exists():
    topo_examples = pd.read_parquet(rd.datadir/'projects'/'tangles'/'simulations'/'fig5_examples.gzip')
    print(f'Loaded {len(topo_examples)} total example datapoints from Parquet cached file')
else:
    with h5py.File(
        rd.datadir/'projects'/'tangles'/'simulations'/'modeling_fig5_combined.h5', 'r') as fig5_raw:
        n_examples = 5
        example_counts = {}
        topo_examples = []
        i = 1
        for group in fig5_raw.keys():
            for run in fig5_raw[group].values():
                run_shape = run['mRNA'].shape
                condition = run.attrs['comment'].decode('utf-8')
                sc_dependent = run.attrs['rates.sc_dependent']
                is_plasmid = run.attrs['bcs.is_circular']
                hill_n     = run.attrs['hill_coeff']
                mRNA_deg_factor = run.attrs['mRNA_deg_factor']
                topo_factor = run.attrs['topo_factor']
                k_val      = run.attrs['K_val']
                k_factor   = run.attrs['K_factor']
                mRNA_deg_rate = run.attrs['rates.mRNA_degradation']
                if 'time' in run.keys():
                    time = run['time']
                else:
                    time = np.linspace(0.0, 50000.0, 500)
                c_tup = (condition, hill_n, k_val, k_factor, topo_factor, mRNA_deg_factor)
                if c_tup not in example_counts or example_counts[c_tup] < n_examples:
                    topo_examples.append(pd.DataFrame(data={
                        'condition': condition,
                        'sc_dependent': sc_dependent,
                        'is_plasmid': is_plasmid,
                        'mRNA_deg_rate': mRNA_deg_rate,
                        'mRNA_deg_factor': mRNA_deg_factor,
                        'topo_factor': topo_factor,
                        'hill_n': hill_n,
                        'k_val': k_val,
                        'k_factor': k_factor,
                        'time': time,
                        'mRNA_1': run['mRNA'][:,0,0],
                        'mRNA_2': run['mRNA'][:,1,0],
                        'idx': i
                    }))
                    i += 1
                    if c_tup not in example_counts:
                        example_counts[c_tup] = 1
                    else:
                        example_counts[c_tup] += 1
        topo_examples = pd.concat(topo_examples)
        print(f'Loaded {len(topo_examples)} example datapoints!')
        topo_examples.to_parquet(rd.datadir/'projects'/'tangles'/'simulations'/'fig5_examples.gzip', compression='gzip')
if (rd.datadir/'projects'/'tangles'/'simulations'/'fig5_summaries.gzip').exists():
    topo_df = pd.read_parquet(rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'fig5_summaries.gzip', 'fig3'))
    print(f'Loaded {len(fig3_df)} simulation runs from Parquet cached file')
else:
    pass
    with h5py.File(
        rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'modeling_fig5_combined.h5', 'fig3_topo_cache'), 'r') as fig5_raw:
        topo_df = []
        i = 0
        # Extract the Spearman correlation between the two mRNA values
        # alongside storing the mean-centered correlation and auto-correlation
        # of the two.
        for group in fig5_raw.keys():
            group_df = []
            for run in fig5_raw[group].values():
                run_shape = run['mRNA'].shape
                condition = run.attrs['comment'].decode('utf-8')
                hill_n    = run.attrs['hill_coeff']
                k_val     = run.attrs['K_val']
                k_factor  = run.attrs['K_factor']
                mRNA_deg_factor = run.attrs['mRNA_deg_factor']
                topo_factor = run.attrs['topo_factor']
                sc_dependent = run.attrs['rates.sc_dependent']
                is_plasmid = run.attrs['bcs.is_circular']
                mRNA_deg_rate = run.attrs['rates.mRNA_degradation']
                if 'time' in run.keys():
                    time = run['time']
                else:
                    time = np.linspace(0.0, 50000.0, 500)
                escape_times = np.zeros(run_shape[2])
                mRNA = run['mRNA'][:,:,:]
                for subrun in range(run_shape[2]):
                    mRNA_1 = mRNA[:,0,subrun]
                    mRNA_2 = mRNA[:,1,subrun]
                    basin_escape = np.nonzero(mRNA_2 > mRNA_1)[0]
                    if len(basin_escape) > 0:
                        escape_times[subrun] = time[basin_escape[0]]
                    else:
                        escape_times[subrun] = np.inf
                group_df.append(pd.DataFrame(data={
                    'condition': condition,
                    'sc_dependent': sc_dependent,
                    'is_plasmid': is_plasmid,
                    'mRNA_deg_rate': mRNA_deg_rate,
                    'topo_factor': topo_factor,
                    'hill_n': hill_n,
                    'k_val': k_val,
                    'k_factor': k_factor,
                    'mRNA_deg_factor': mRNA_deg_factor,
                    'escape_time': escape_times}))
            topo_df.append(pd.concat(group_df, ignore_index=True))
            print(f'Loaded {len(topo_df[-1])} simulations...')
        topo_df = pd.concat(topo_df, ignore_index=True)
        print(f'Loaded {len(topo_df)} total simulations!')
        topo_df.to_parquet(rd.outfile(rd.datadir/'projects'/'tangles'/'simulations'/'fig5_summaries.gzip', 'fig3_topo_cache'), compression='gzip')

In [None]:
# Summarize escapes
fig3_escapes = fig3_df.groupby([
    'condition', 'mRNA_deg_factor', 'k_factor', 'hill_n']).apply(
        summarize_escape_times).reset_index()
topo_escapes = topo_df.groupby([
    'condition', 'mRNA_deg_factor', 'k_factor', 'hill_n', 'topo_factor']).apply(
        summarize_escape_times).reset_index()

In [None]:
# Summarize half-lives
def summarize_half_lives(df, offset=0.0):
    half_life_times = sorted(df[df.escape_fraction > 0.5]['time'])
    return half_life_times[0]  - offset if len(half_life_times) != 0 else max(df['time']) - offset

In [None]:
fig3_halflives = fig3_escapes.groupby(['condition', 'mRNA_deg_factor', 'k_factor', 'hill_n']).apply(lambda df: summarize_half_lives(df, 10000.0)).reset_index()

In [None]:
def summarize_critical_n(df):
    return [min(df['hill_n']), max(df['hill_n'])]
fig3_crit_n = fig3_escapes[
    (np.abs(fig3_escapes.k_factor - 3.16) < 0.01) & 
    (fig3_escapes.escape_fraction > 0.9) &
    (np.abs(fig3_escapes.time - 17186.0) < 10)
].copy().groupby(['condition', 'mRNA_deg_factor']).apply(summarize_critical_n).reset_index()
fig3_crit_n[['min_n', 'max_n']] = pd.DataFrame(fig3_crit_n[0].tolist(), index=fig3_crit_n.index)
# Not doing anything with this plot :(

In [None]:
# Identify examples
fig3_examples[
    (fig3_examples.time == 25000.0) &
    (np.abs(fig3_examples.mRNA_deg_factor - 10) < 0.001) &
    (np.abs(fig3_examples.k_factor - 1.0) < 0.01) &
    (fig3_examples.hill_n==2.0)
]

In [None]:
# Create the overall figure, gridspec, and add subfigure labels.
fig3 = plt.figure(figsize=(6.2,4.8))
fig3_gridspec = gridspec.GridSpec(3, 2, figure=fig3,
    wspace=0.1, hspace=0.1, width_ratios=[3.5,1])
fig3_subfigures = {
    'A': fig3.add_subfigure(fig3_gridspec[0,:]),
    'B': fig3.add_subfigure(fig3_gridspec[1,:]),
    'C': fig3.add_subfigure(fig3_gridspec[2,0]),
    'D': fig3.add_subfigure(fig3_gridspec[2,1]),
}
for label, subfig in fig3_subfigures.items():
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}.', fontsize=13, fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))
fig3_palette = {
    'fig3.tandem': main_palette['tandem'],
    'fig3.tandem_up': '#f1be64',
    'fig3.convergent': main_palette['convergent'],
    'fig3.divergent': main_palette['divergent'],
    'fig5.tandem': main_palette['tandem'],
    'fig5.tandem_up': '#f1be64',
    'fig5.convergent': main_palette['convergent'],
    'fig5.divergent': main_palette['divergent'],
    'gray': main_palette['gray'],
    'background_gray': '#DDDDDD'
}

In [None]:
# Plot Fig 3b
fig3_subfigures['B'].subplots_adjust(left=0.1, bottom=0.25, right=0.98, top=0.8)
fig_3b_axes = fig3_subfigures['B'].subplots(ncols=4, sharey=True)
for ax, idx in zip(fig_3b_axes, [25841, 17660, 31015, 31016]):
    fig_3b_df = fig3_examples[fig3_examples.idx == idx]
    cond = fig_3b_df['condition'].iloc[0]
    sns.lineplot(data=fig3_examples[fig3_examples.idx == idx], x='time', y='mRNA_1', linewidth=0.8, color=fig3_palette[cond], ax=ax)
    sns.lineplot(data=fig3_examples[fig3_examples.idx == idx], x='time', y='mRNA_2', linewidth=0.8, color=fig3_palette['gray'], ax=ax)
    ax.axvspan(10000, 25000, color=fig3_palette['background_gray'])
    ax.set_xlim([0,25000])
    ax.set_xticks([0,10000,20000])
    sns.despine(ax=ax)
    ax.set_xlabel('')
    if cond.endswith('tandem'):
        draw_arrows(fig3_subfigures['B'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig3_palette[cond], fig3_palette['gray'])
    elif cond.endswith('tandem_up'):
        draw_arrows(fig3_subfigures['B'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig3_palette['gray'], fig3_palette[cond])
    elif cond.endswith('convergent'):
        draw_arrows(fig3_subfigures['B'], ax,
            0.28, 0.72, 1.1, 1.25, 0.07, 1, -1,
            fig3_palette[cond], fig3_palette['gray'])
    elif cond.endswith('divergent'):
        draw_arrows(fig3_subfigures['B'], ax,
            0.4, 0.6, 1.1, 1.25, 0.07, -1, 1,
            fig3_palette[cond], fig3_palette['gray'])
fig_3b_axes[0].set_ylabel('mRNA count')
fig3_subfigures['B'].text(0.5, 0.05, "Time (s)", horizontalalignment='center')
fig3.savefig('test.svg')

In [None]:
# Plot Fig 3c
fig3c_df = fig3_basin_examples[
    (fig3_basin_examples.condition == 'fig3.convergent') &
    (np.abs(fig3_basin_examples.mRNA_deg_factor - 10) < 0.001) &
    (np.abs(fig3_basin_examples.k_factor - 1.0) < 0.01) &
    (fig3_basin_examples.hill_n==2.0)
].copy()
fig3_subfigures['C'].subplots_adjust(left=0.1, bottom=0.27, right=0.9, top=0.80)
fig_3c_axes = fig3_subfigures['C'].subplots(ncols=4, sharey=True)
for t, ax in zip([10200,12500,17500,25000], fig_3c_axes):
    # Count the amount of mRNA in each state
    mean_state = np.mean(fig3c_df[f'mRNA_1_at_{t}'] > fig3c_df[f'mRNA_2_at_{t}'])
    sns.kdeplot(data=fig3c_df,x=f'mRNA_1_at_{t}', y=f'mRNA_2_at_{t}', color=main_palette['divergent'],
        cut=0, clip=[0,40],fill=True, ax=ax)
    ax.axline((0,0), slope=1, linestyle=':', color=main_palette['gray'])
    sns.despine(ax=ax)
    ax.set_xlim([-2,40])
    ax.set_ylim([-2,40])
    ax.set_xlabel('')
    ax.text(0.25, 0.65, f'{(1.0 - mean_state) * 100:.1f}%', transform=ax.transAxes, fontsize=font_sizes['data_annotation'])
    ax.text(0.65, 0.25, f'{mean_state * 100:.1f}%', transform=ax.transAxes, fontsize=font_sizes['data_annotation'])
fig_3c_axes[0].set_ylabel('mRNA 2 count')
fig3_subfigures['C'].text(0.5, 0.05, "mRNA 1 count", horizontalalignment='center')

fig3c_time_ax = fig3_subfigures['C'].add_axes((0.1, 0.908, 0.8, 0.1))
fig3c_time_ax.yaxis.set_visible(False)
fig3c_time_ax.set_xlim([0,8])
fig3c_time_ax.set_xticks([1, 3, 5, 7])
fig3c_time_ax.set_xticklabels(['10.2k', '12.5k', '17.5k', '25k'])
fig3_subfigures['C'].text(0.5, 0.93, "Time (s)", horizontalalignment='center')
fig3c_time_ax.plot(1, 0, ">k", transform=fig3c_time_ax.transAxes, clip_on=False)
sns.despine(ax=fig3c_time_ax, left=True)

fig3.savefig('test.svg')

In [None]:
# Plot Fig 3d
fig_3d_df = fig3_escapes[
    (np.abs(fig3_escapes.mRNA_deg_factor - 10) < 0.001) &
    (np.abs(fig3_escapes.k_factor - 1.0) < 0.01) &
    (fig3_escapes.hill_n == 2.0)
]
fig3_subfigures['D'].subplots_adjust(left=0.28, bottom=0.3, right=0.95, top=0.9)
fig_3d_ax = fig3_subfigures['D'].subplots(ncols=1)
sns.lineplot(data=fig_3d_df, x='time', y='stable_fraction', hue='condition', palette=fig3_palette, linewidth=1.5, ax=fig_3d_ax, legend=None)
sns.despine(ax=fig_3d_ax)
for tick in fig_3d_ax.xaxis.get_major_ticks():
    tick.label1.set_horizontalalignment('right')
fig_3d_ax.axvspan(10000, 25000, color=fig3_palette['background_gray'])
fig_3d_ax.set_xlabel('Time (s)')
fig_3d_ax.set_ylabel('Stable fraction')
fig_3d_ax.set_xlim([9000,25000])
fig3_subfigures['D'].text(0.5, 0.92, "n = 2.0", horizontalalignment='center')
fig3.savefig('test.svg')

In [None]:
fig3.savefig(rd.outfile(rd.rootdir / 'writeups' / 'figures' / 'modeling_paper' / 'fig_3_mpl.svg', 'fig3'))

In [None]:
# Create the overall figure, gridspec, and add subfigure labels.
fig4 = plt.figure(figsize=(6.2,5.5))
fig4_gridspec = gridspec.GridSpec(4, 2, figure=fig4,
    wspace=0.1, hspace=0.1, height_ratios=[1, 1, 1.5, 1])
fig4_subfigures = {
    'A': fig4.add_subfigure(fig4_gridspec[0,:]),
    'B': fig4.add_subfigure(fig4_gridspec[1,:]),
    'C': fig4.add_subfigure(fig4_gridspec[2,0]),
    'D': fig4.add_subfigure(fig4_gridspec[2,1]),
    'E': fig4.add_subfigure(fig4_gridspec[3,:]),
}
for label, subfig in fig4_subfigures.items():
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}.', fontsize=13, fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))

In [None]:
# Plot Fig 4b
fig4b_df = fig3_escapes[
    (np.abs(fig3_escapes.mRNA_deg_factor - 10) < 0.001) &
    (np.abs(fig3_escapes.k_factor - 1.0) < 0.01)
].copy()
fig4_subfigures['B'].subplots_adjust(left=0.1, bottom=0.27, right=0.9, top=0.9)
fig_4b_axes = fig4_subfigures['B'].subplots(ncols=4, sharey=True)
for c, ax in zip(['fig3.tandem', 'fig3.tandem_up', 'fig3.convergent', 'fig3.divergent'], fig_4b_axes):
    sns.lineplot(data=fig4b_df[fig4b_df.condition == c], x='time', y='stable_fraction', hue='hill_n', palette=no_yellow_viridis, estimator=None, ax=ax, legend=None)
    ax.set_xlim([9000,25000])
    ax.set_ylim([0,1])
    ax.set_xticks([10000,20000])
    ax.set_xlabel('')
    sns.despine(ax=ax)
    if c.endswith('tandem'):
        draw_arrows(fig4_subfigures['B'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig3_palette[c], fig3_palette['gray'])
    elif c.endswith('tandem_up'):
        draw_arrows(fig4_subfigures['B'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig3_palette['gray'], fig3_palette[c])
    elif c.endswith('convergent'):
        draw_arrows(fig4_subfigures['B'], ax,
            0.28, 0.72, 1.1, 1.25, 0.07, 1, -1,
            fig3_palette[c], fig3_palette['gray'])
    elif c.endswith('divergent'):
        draw_arrows(fig4_subfigures['B'], ax,
            0.4, 0.6, 1.1, 1.25, 0.07, -1, 1,
            fig3_palette[c], fig3_palette['gray'])
fig_4b_axes[0].set_ylabel('Stable fraction')
fig4_subfigures['B'].text(0.5, 0.05, "Time (s)", horizontalalignment='center')
fig_4b_cbar_ax = fig4_subfigures['B'].add_axes([0.915, 0.27, 0.02, 0.62])
fig4_subfigures['B'].colorbar(cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=1.0, vmax=5.0), cmap=no_yellow_viridis), cax=fig_4b_cbar_ax)
fig_4b_cbar_ax.set_yticks(list(range(1,6)))
#fig_4b_cbar_ax.set_yticklabels([f'{x}x' for x in [1,10,100,300]])
fig_4b_cbar_ax.set_title('Hill coefficient', fontsize=8)
fig4.savefig('test.svg')

In [None]:
# Plot Fig 4c
fig_4c_df = fig3_halflives[
    (np.abs(fig3_halflives.mRNA_deg_factor - 10) < 0.001) &
    (np.abs(fig3_halflives.k_factor - 1.0) < 0.01)
].copy()
fig4_subfigures['C'].subplots_adjust(left=0.25, bottom=0.3, right=0.95, top=0.9)
fig_4c_ax = fig4_subfigures['C'].subplots(ncols=1)
sns.lineplot(data=fig_4c_df, y=0, x='hill_n', hue='condition', palette=fig3_palette, legend=None, ax=fig_4c_ax)
sns.despine(ax=fig_4c_ax)
fig_4c_ax.set_xlabel('Hill coefficent (n)')
fig_4c_ax.set_ylabel('Half life (s)')
fig_4c_ax.yaxis.set_major_formatter(k_formatter)
fig_4c_ax.set_xlim([1.0,5.0])
fig_4c_ax.set_ylim([0,15500])
fig_4c_ax.set_yticks(list(range(0,20000,5000)))
fig_4c_ax.set_xticks(list(range(1,6)))
fig4.savefig('test.svg')

In [None]:
# Plot Fig4d
fig_4d_df = fig3_halflives[
    (np.abs(fig3_halflives.k_factor - 1.0) < 0.01) &
    (fig3_halflives.hill_n == 2.0)
].copy()
fig4_subfigures['D'].subplots_adjust(left=0.25, bottom=0.3, right=0.9, top=0.9)
fig_4d_ax = fig4_subfigures['D'].subplots(ncols=1)
lp_3e = sns.lineplot(data=fig_4d_df, x='mRNA_deg_factor', y=0, hue='condition', palette=fig3_palette, legend=None, ax=fig_4d_ax)
lp_3e.set(xscale='log')
sns.despine(ax=fig_4d_ax)
fig_4d_ax.set_xlim([1,100])
fig_4d_ax.set_ylim([0, 15500])
fig_4d_ax.set_xticks([1,10,100])
fig_4d_ax.yaxis.set_major_formatter(k_formatter)
fig_4d_ax.set_yticks(list(range(0,20000,5000)))
fig_4d_ax.set_xticklabels(['1x', '10x', '100x'])
fig_4d_ax.set_xlabel('mRNA degradation rate')
fig_4d_ax.set_ylabel('Half life (s)')
fig4.savefig('test.svg')

In [None]:
# Plot Fig4e
fig4e_df = topo_escapes[
    (np.abs(topo_escapes.mRNA_deg_factor - 10) < 0.001) &
    (np.abs(topo_escapes.k_factor - 1.0) < 0.01) &
    (topo_escapes.hill_n == 2.0)
].copy()
fig4_subfigures['E'].subplots_adjust(left=0.1, bottom=0.27, right=0.9, top=0.8)
fig_4e_axes = fig4_subfigures['E'].subplots(ncols=4, sharey=True)
for c, ax in zip(['fig5.tandem', 'fig5.tandem_up', 'fig5.convergent', 'fig5.divergent'], fig_4e_axes):
    sns.lineplot(data=fig4e_df[fig4e_df.condition == c], x='time', y='stable_fraction', hue='topo_factor',
        estimator=None, hue_norm=matplotlib.colors.LogNorm(vmin=1.0, vmax=300), legend=None, ax=ax)
    sns.despine(ax=ax)
    ax.set_xlabel('')
    ax.set_xlim([10000,25000])
    if c.endswith('tandem'):
        draw_arrows(fig4_subfigures['E'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig3_palette[c], fig3_palette['gray'])
    elif c.endswith('tandem_up'):
        draw_arrows(fig4_subfigures['E'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig3_palette['gray'], fig3_palette[c])
    elif c.endswith('convergent'):
        draw_arrows(fig4_subfigures['E'], ax,
            0.28, 0.72, 1.1, 1.25, 0.07, 1, -1,
            fig3_palette[c], fig3_palette['gray'])
    elif c.endswith('divergent'):
        draw_arrows(fig4_subfigures['E'], ax,
            0.4, 0.6, 1.1, 1.25, 0.07, -1, 1,
            fig3_palette[c], fig3_palette['gray'])
fig_4e_axes[0].set_ylabel('Stable fraction')
fig4_subfigures['E'].text(0.5, 0.05, "Time (s)", horizontalalignment='center')
fig_4e_cbar_ax = fig4_subfigures['E'].add_axes([0.915, 0.27, 0.02, 0.55])
fig4_subfigures['E'].colorbar(cm.ScalarMappable(norm=matplotlib.colors.LogNorm(vmin=1.0, vmax=300), cmap='flare_r'), cax=fig_4e_cbar_ax)
fig_4e_cbar_ax.set_yticks([1,10,100,300])
fig_4e_cbar_ax.set_yticklabels([f'{x}x' for x in [1,10,100,300]])
fig_4e_cbar_ax.set_title('Topo rate', fontsize=8)
fig4.savefig('test.svg')

In [None]:
fig4.savefig(rd.outfile(rd.rootdir / 'writeups' / 'figures' / 'modeling_paper' / 'fig_4_mpl.svg', 'fig3'))

## Figure 5 (Zinani)

In [None]:
if (rd.datadir/'projects'/'tangles'/'simulations'/'fig4_examples.gzip').exists():
    fig5_examples = pd.read_parquet(rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'fig4_examples.gzip', 'fig4'))
    print(f'Loaded {len(fig5_examples)} total example datapoints from Parquet cached file')
else:
    with h5py.File(
        rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'modeling_fig4_combined.h5', 'fig4_examples_cache'), 'r') as fig5_raw:
        n_examples = 5
        example_counts = {}
        fig5_examples = []
        i = 1
        for group in fig5_raw.keys():
            for run in fig5_raw[group].values():
                run_shape = run['mRNA'].shape
                condition = run.attrs['comment'].decode('utf-8')
                temperature = run.attrs['temperature']
                c_tup = (condition, temperature)
                if c_tup not in example_counts or example_counts[c_tup] < n_examples:
                    fig4_examples.append(pd.DataFrame(data={
                        'condition': condition,
                        'temperature': temperature,
                        'time': run['time'],
                        'her1_mRNA': run['mRNA'][:,0,0],
                        'her7_mRNA': run['mRNA'][:,1,0],
                        'her1_protein': run['discrete_components'][:,0,0],
                        'her7_protein': run['discrete_components'][:,1,0],
                        'hes6_protein': run['discrete_components'][:,2,0],
                        'her1_promoter_empty': run['discrete_components'][:,3,0],
                        'her7_promoter_empty': run['discrete_components'][:,4,0],
                        'her1_promoter_with_11': run['discrete_components'][:,5,0],
                        'her1_promoter_with_76': run['discrete_components'][:,6,0],
                        'her7_promoter_with_11': run['discrete_components'][:,7,0],
                        'her7_promoter_with_76': run['discrete_components'][:,8,0],
                        'idx': i
                    }))
                    i += 1
                    if c_tup not in example_counts:
                        example_counts[c_tup] = 1
                    else:
                        example_counts[c_tup] += 1
        fig5_examples = pd.concat(fig5_examples)
        print(f'Loaded {len(fig5_examples)} example datapoints!')
        fig5_examples.to_parquet(rd.outfile(rd.datadir/'projects'/'tangles'/'simulations'/'fig4_examples.gzip', 'fig4_examples_cache'), compression='gzip')
if (rd.datadir/'projects'/'tangles'/'simulations'/'fig4_summaries.gzip').exists():
    fig5_df = pd.read_parquet(rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'fig4_summaries.gzip', 'fig4'))
    print(f'Loaded {len(fig5_df)} simulation runs from Parquet cached file')
else:
    with h5py.File(
        rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'modeling_fig4_combined.h5', 'fig4_cache'), 'r') as fig5_raw:
        fig5_df = []
        i = 0
        # Extract the Spearman correlation between the two mRNA values
        # alongside storing the mean-centered correlation and auto-correlation
        # of the two.
        for group in fig5_raw.keys():
            group_df = []
            for run in fig5_raw[group].values():
                run_shape = run['mRNA'].shape
                mRNA = run['mRNA'][:,:,:]
                for subrun in range(run_shape[2]):
                    time = run['time'][:]
                    # Perform correlation after "burn-in" time
                    # of 10000 seconds
                    her1 = mRNA[:,0,subrun]
                    her7 = mRNA[:,1,subrun]
                    her1_trimmed = her1[time > 10000]
                    her7_trimmed = her7[time > 10000]
                    mc_her1 = her1_trimmed - np.mean(her1_trimmed)
                    mc_her7 = her7_trimmed - np.mean(her7_trimmed)
                    sc, _ = scipy.stats.spearmanr(her1_trimmed, her7_trimmed)
                    cross_corr = scipy.signal.correlate(mc_her1, mc_her7)
                    autocorr_correction = np.sqrt(
                        scipy.signal.correlate(mc_her1, mc_her1, mode='valid')[0] * 
                        scipy.signal.correlate(mc_her7, mc_her7, mode='valid')[0])
                    group_df.append(pd.DataFrame(data={
                        'spearman_corr': sc,
                        'cross_corr': (cross_corr,),
                        'autocorr_correction': autocorr_correction,
                        'condition': run.attrs['comment'].decode('utf-8'),
                        'temperature': run.attrs['temperature'],
                        'idx': i}))
                    i += 1
            fig5_df.append(pd.concat(group_df, ignore_index=True))
            print(f'Loaded {len(fig5_df[-1])} simulations...')
        fig5_df = pd.concat(fig5_df, ignore_index=True)
        print(f'Loaded {len(fig5_df)} total simulations!')
        fig5_df.to_parquet(rd.outfile(rd.datadir/'projects'/'tangles'/'simulations'/'fig4_summaries.gzip', 'fig4_cache'), compression='gzip')

In [None]:
# Create the overall figure, gridspec, and add subfigure labels.
fig5 = plt.figure(figsize=(6.2,7.12))
fig5_gridspec = gridspec.GridSpec(4, 2, figure=fig5,
    wspace=0.1, hspace=0.1)
fig5_subfigures = {
    'A': fig5.add_subfigure(fig5_gridspec[0,0]),
    'B': fig5.add_subfigure(fig5_gridspec[0,1]),
    'C': fig5.add_subfigure(fig5_gridspec[1,:]),
    'D': fig5.add_subfigure(fig5_gridspec[2,:]),
    'E': fig5.add_subfigure(fig5_gridspec[3,0]),
    'F': fig5.add_subfigure(fig5_gridspec[3,1])
}
for label, subfig in fig5_subfigures.items():
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}.', fontsize=13, fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))
fig5_palette = {
    'her1': '#5254a3',
    'her7': '#9c9ede',
    'fig4.uncoupled': '#b5cf6b',
    'fig4.fully-coupled': '#637939',
    'fig4.tangles-coupled': main_palette['divergent'],
    'gray': main_palette['gray'],
    'background_gray': '#DDDDDD'
    }

In [None]:
# Plot Fig2c
fig5_subfigures['C'].subplots_adjust(left=0.12, bottom=0.25, right=0.95, top=0.8)
fig_5c_axes = fig5_subfigures['C'].subplots(ncols=3, sharey=True)
for ax, condition in zip(fig_5c_axes, ['fig4.uncoupled', 'fig4.fully-coupled', 'fig4.tangles-coupled']):
    df = fig5_examples[(fig5_examples.condition == condition) & (fig5_examples.temperature == 301.15)]
    idx = df.idx.unique()[1]
    if condition.endswith('tangles-coupled'):
        tangles_idx = idx
    sns.lineplot(data=df[df.idx == idx], x='time', y='her1_mRNA', ax=ax, color=fig5_palette['her1'])
    sns.lineplot(data=df[df.idx == idx], x='time', y='her7_mRNA', ax=ax, color=fig5_palette['her7'])
    sns.despine(ax=ax)
    ax.set_ylim([0, 80])
    ax.set_xlim([0, 30000])
    ax.set_xticks([0, 15000, 30000])
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_facecolor(fig5_palette[condition] + '55')
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_horizontalalignment('right')
fig_5c_axes[0].set_ylabel('mRNA counts')
fig_5c_axes[1].set_xlabel('Time (s)')
fig_5c_axes[0].text(x=0.5, y=1.1, s='Uncoupled', horizontalalignment='center', transform=fig_5c_axes[0].transAxes)
fig_5c_axes[1].text(x=0.5, y=1.1, s='Transcript coupled', horizontalalignment='center', transform=fig_5c_axes[1].transAxes)
fig_5c_axes[2].text(x=0.5, y=1.1, s='Biophysical coupled', horizontalalignment='center', transform=fig_5c_axes[2].transAxes)
# Add inset axes
fig_5c_inset = fig_5c_axes[2].inset_axes([0.22, 0.78, 0.28, 0.28])
sns.lineplot(data=df[df.idx == tangles_idx], x='time', y='her1_mRNA',
    ax=fig_5c_inset, color=fig5_palette['her1'])
sns.lineplot(data=df[df.idx == tangles_idx], x='time', y='her7_mRNA',
    ax=fig_5c_inset, color=fig5_palette['her7'])
sns.despine(ax=fig_5c_inset)
fig_5c_inset.set_facecolor(fig5_palette['background_gray'])
fig_5c_inset.set_xlabel('')
fig_5c_inset.set_ylabel('')
fig_5c_inset.set_xticks([])
fig_5c_inset.set_yticks([])
fig5.savefig('test.svg')

In [None]:
# Plot Fig2d
fig5_subfigures['D'].subplots_adjust(left=0.12, bottom=0.25, right=0.95, top=0.8)
fig_5d_axes = fig5_subfigures['D'].subplots(ncols=3, sharey=True)
for ax, condition in zip(fig_5d_axes, ['fig4.uncoupled', 'fig4.fully-coupled', 'fig4.tangles-coupled']):
    df = fig5_examples[(fig5_examples.condition == condition) & (fig5_examples.temperature == 301.15)]
    idx = df.idx.unique()[1]
    if condition.endswith('tangles-coupled'):
        tangles_idx = idx
    sns.lineplot(data=df[df.idx == idx], x='time', y='her1_protein', ax=ax, color=fig5_palette['her1'])
    sns.lineplot(data=df[df.idx == idx], x='time', y='her7_protein', ax=ax, color=fig5_palette['her7'])
    sns.despine(ax=ax)
    #ax.set_ylim([0, 80])
    ax.set_xlim([0, 30000])
    ax.set_xticks([0, 15000, 30000])
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_facecolor(fig5_palette[condition] + '55')
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_horizontalalignment('right')
fig_5d_axes[0].set_ylabel('Protein counts')
fig_5d_axes[1].set_xlabel('Time (s)')
fig_5d_axes[0].text(x=0.5, y=1.1, s='Uncoupled', horizontalalignment='center', transform=fig_5d_axes[0].transAxes)
fig_5d_axes[1].text(x=0.5, y=1.1, s='Transcript coupled', horizontalalignment='center', transform=fig_5d_axes[1].transAxes)
fig_5d_axes[2].text(x=0.5, y=1.1, s='Biophysical coupled', horizontalalignment='center', transform=fig_5d_axes[2].transAxes)
fig5.savefig('test.svg')

In [None]:
# Plot Fig2e
fig5_subfigures['E'].subplots_adjust(left=0.2, bottom=0.23, right=0.95, top=0.9)
fig_5e_ax = fig5_subfigures['E'].subplots(ncols=1)
sns.violinplot(data=fig5_df[fig5_df.temperature == 301.15],
    x='condition', y='spearman_corr', inner='quartile',
    palette=fig5_palette, saturation=0.7,
    bw=0.1, ax=fig_5e_ax)
fig_5e_ax.set_xlabel('')
fig_5e_ax.set_ylabel('Correlation\ncoefficient')
fig_5e_ax.set_xticklabels(['Uncoupled','Transcript\ncoupled','Biophysical\ncoupled'])
sns.despine(ax=fig_5e_ax)
fig5.savefig('test.svg')

In [None]:
# Prepare fig5f
fig_5f_crosscorr = fig5_df.groupby(['condition', 'temperature']).apply(lambda df:np.vstack(df.cross_corr / df.autocorr_correction).mean(axis=0)).reset_index()
signal_length = sum(fig5_examples.time.unique() > 10000)
fig_5f_crosscorr.insert(2, 'time_delay', 6 * [scipy.signal.correlation_lags(signal_length, signal_length) * np.diff(fig5_examples.time.unique())[0]])
fig_5f_crosscorr = fig_5f_crosscorr.rename(columns={0:'cross_correlation'})
fig_5f_crosscorr = fig_5f_crosscorr.explode(['time_delay', 'cross_correlation'])

In [None]:
# Plot fig5f
fig5_subfigures['F'].subplots_adjust(left=0.2, bottom=0.23, right=0.95, top=0.9)
fig_5f_ax = fig5_subfigures['F'].subplots(ncols=1)
sns.lineplot(data=fig_5f_crosscorr[fig_5f_crosscorr.temperature == 301.15], x='time_delay', y='cross_correlation',
    hue='condition', palette=fig5_palette, ax=fig_5f_ax, estimator=None, legend=None)
sns.despine(ax=fig_5f_ax)
fig_5f_ax.set_ylim([-.3, 0.75])
fig_5f_ax.set_yticks(np.arange(-0.25, 1.0, 0.25))
fig_5f_ax.set_xlabel('Time delay (s)')
fig_5f_ax.set_ylabel('Cross-correlation')
fig_5f_ax.axhline(y=0, color='k')
fig_5f_ax.axvline(x=0, color='#888888', linestyle=':')
fig5.savefig('test.svg')

In [None]:
fig5.savefig(rd.outfile(rd.rootdir/'writeups'/'figures'/'modeling_paper'/'fig_5_mpl.svg', 'fig4'))

## Supplemental Figure 1
This can have the full sigma_squared coeff table (large part a). We can also have the noise decomp in the plasmid case, and the spacing-derived coupling in the plasmid case.

We should also include the non-normalized version of figure 1c


### Visual description
???

In [None]:
with h5py.File(rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'sigma_sweep.h5', 'fig_supp_alpha'), 'r') as sigma_h5:
    alpha_sweep_df = pd.concat([
        pd.DataFrame({
            'sigma': sigma_h5['sweep']['sigma'],
            'alpha': sigma_h5['sweep']['s2_coeff'][a_idx],
            'energy': sigma_h5['sweep']['values'][a_idx,:]
        }) for a_idx in range(len(sigma_h5['sweep']['s2_coeff']))
    ])

In [None]:
fig_supp_alpha = plt.figure(figsize=(6.50,6))
fig_supp_alpha_gridspec = gridspec.GridSpec(4, 1, figure=fig_supp_alpha,
    wspace=0.1, hspace=0.15)
fig_supp_alpha_subfigures = {
    'A': fig_supp_alpha.add_subfigure(fig_supp_alpha_gridspec[0,:]),
    'B': fig_supp_alpha.add_subfigure(fig_supp_alpha_gridspec[1,:]),
    'C': fig_supp_alpha.add_subfigure(fig_supp_alpha_gridspec[2,:]),
    'D': fig_supp_alpha.add_subfigure(fig_supp_alpha_gridspec[3,:]),
}
for label, subfig in fig_supp_alpha_subfigures.items():
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}.', fontsize=13, fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))
    subfig.subplots_adjust(left=0.1, bottom=0.28, right=0.95, top=0.8)
unique_alphas = sorted(base_model_df['coeff.sigma_squared'].unique())
alpha_values = unique_alphas
fig_supp_energy_axes = fig_supp_alpha_subfigures['A'].subplots(ncols=len(alpha_values), sharex=True, sharey=True)
fig_supp_linear_axes = fig_supp_alpha_subfigures['B'].subplots(ncols=len(alpha_values), sharex=True, sharey=True)
fig_supp_circular_axes = fig_supp_alpha_subfigures['C'].subplots(ncols=len(alpha_values), sharex=True, sharey=True)
fig_supp_dist_axes = fig_supp_alpha_subfigures['D'].subplots(ncols=len(alpha_values), sharex=True, sharey=True)
# Plot Fig1b
for alpha, e_ax, linear_ax, circular_ax, dist_ax in zip(
    alpha_values,
    fig_supp_energy_axes,
    fig_supp_linear_axes,
    fig_supp_circular_axes,
    fig_supp_dist_axes
):
    df_supp_alpha = base_model_df[
        (
            base_model_df['rates.sc_dependent'] == 1.0
        ) & (
            (base_model_df['condition'] == 'fig.bm.tandem_reporter_downstream') |
            (base_model_df['condition'] == 'fig.bm.tandem_reporter_upstream') |
            (base_model_df['condition'] == 'fig.bm.convergent') |
            (base_model_df['condition'] == 'fig.bm.divergent')
        ) & (
            (np.abs(base_model_df['coeff.sigma_squared'] - alpha) < 0.0001)
        )
    ].copy()
    e_ax.axhline(0, color=main_palette['gray'])
    e_ax.axvline(0, color=main_palette['gray'])

    closest_alpha_df = alpha_sweep_df[
        np.abs(alpha_sweep_df.alpha - alpha) == min(np.abs(alpha_sweep_df.alpha - alpha))
    ]
    
    e_ax.plot(closest_alpha_df['sigma'], closest_alpha_df['energy'], color='k')
    e_ax.text(x=0.5, y=1.1, s=f'α={alpha:0.1e}', horizontalalignment='center', transform=e_ax.transAxes)

    df_supp_alpha['condition'] = df_supp_alpha['condition'].cat.remove_unused_categories()
    df_supp_alpha['norm_induction'] = df_supp_alpha['induction'] / (1.0 / 120.0)
    sns.lineplot(data=df_supp_alpha[df_supp_alpha.is_plasmid == 0.0],
                x='norm_induction', y='constant_mRNA', hue='condition', palette=fig_bm_palette, linewidth=1.5,
                legend=False, ax=linear_ax, ci=None)
    sns.lineplot(data=df_supp_alpha[df_supp_alpha.is_plasmid == 1.0],
                x='norm_induction', y='constant_mRNA', hue='condition', palette=fig_bm_palette, linewidth=1.5,
                legend=False, ax=circular_ax, ci=None)
    sns.kdeplot(data=df_supp_alpha[(df_supp_alpha.is_plasmid == 0.0) & (np.abs(df_supp_alpha.induction - 8.0e-3) < 1e-4)],
                x='inducible_mRNA', y='constant_mRNA', hue='condition', palette=fig_bm_palette, ax=dist_ax, legend=None)
    linear_ax.set_xlabel('')
    circular_ax.set_xlabel('')
    dist_ax.set_xlabel('')
    sns.despine(ax=e_ax)
    sns.despine(ax=linear_ax)
    sns.despine(ax=circular_ax)
    sns.despine(ax=dist_ax)
fig_supp_energy_axes[3].set_xlabel('σ')
fig_supp_linear_axes[3].set_xlabel('Inducible output')
fig_supp_circular_axes[3].set_xlabel('Inducible output')
fig_supp_dist_axes[3].set_xlabel('Inducible output')
fig_supp_energy_axes[0].set_ylabel('Energy (pN nm)')
fig_supp_linear_axes[0].set_ylabel('Reporter output')
fig_supp_circular_axes[0].set_ylabel('Reporter output')
fig_supp_dist_axes[0].set_ylabel('Reporter output')
fig_supp_linear_axes[3].text(x=0.5, y=1.1, s='Linear BCs', horizontalalignment='center', transform=fig_supp_linear_axes[3].transAxes)
fig_supp_circular_axes[3].text(x=0.5, y=1.1, s='Circular BCs', horizontalalignment='center', transform=fig_supp_circular_axes[3].transAxes)
fig_supp_alpha.savefig('test.svg')
fig_supp_alpha.savefig(rd.outfile(rd.rootdir/'writeups'/'figures'/'modeling_paper'/'sfig_alpha_sweep.svg', 'fig_supp_alpha'))