In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors
import matplotlib.cm as cm
import matplotlib.image
import matplotlib.patches
import matplotlib.ticker
import matplotlib.gridspec as gridspec
import h5py
import pandas as pd
import numpy as np
import svgutils.transform as sg
import scipy.signal
import scipy.stats

from pathlib import Path
import re

import itertools
sns.set_style('ticks')
sns.set_context('paper',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

datadir = Path(Path('datadir.txt').read_text())

In [None]:
def add_external_svgs(final_filename, base_svg_filename, subfigures_dict, external_svg_mapping):
    # Inspired by https://jetholt.com/micro/programmatically-merging-svg-files/
    # Units from https://www.w3.org/TR/css-values/#absolute-lengths
    def unit_to_pixels(unit_str):
        value = float(re.search(r'[0-9\.]+', unit_str).group())
        unit_mapping = {
            'cm': 96.0 / 2.54,
            'mm': 96.0 / 2.54 / 10.0,
            'Q':  96.0 / 2.54 / 40.0,
            'in': 96.0,
            'pc': 96.0 / 6.0,
            'pt': 96.0 / 72.0,
            'px': 1.0
        }
        for unit, conversion in unit_mapping.items():
            if unit_str.endswith(unit):
                return value * conversion
        # Default to pixels
        return value
    def pixels_to_base_units(svg, xy):
        # Converts (xy) units to whatever base units.
        raw_value = float(re.search(r'[0-9\.]+', svg.get_size()[0]).group())
        pixel_value = unit_to_pixels(svg.get_size()[0])

        return (xy[0] * raw_value / pixel_value, xy[1] * raw_value / pixel_value)

    def svg_size(svg):
        size_tup = svg.get_size()
        return (unit_to_pixels(size_tup[0]), unit_to_pixels(size_tup[1]))
    base_svg = sg.fromfile(base_svg_filename)
    # Get the size of the base SVG
    base_size = svg_size(base_svg)
    for subfig, mapping in external_svg_mapping.items():
        external_svg = sg.fromfile(mapping['filename'])
        external_size = svg_size(external_svg)
        
        if 'extents' in mapping:
            extents = mapping['extents']
        else:
            extents = (0, 0, 1, 1) # left, bottom, width, height

        # Calculate subfigure extents in pixel-space
        subfig_bbox = subfigures_dict[subfig].bbox.get_points()
        fig_bbox = subfigures_dict[subfig].figure.bbox.get_points()

        fig_space_extents = (
            subfig_bbox[0,0] + extents[0] * (subfig_bbox[1,0] - subfig_bbox[0,0]), # left
            subfig_bbox[0,1] + extents[1] * (subfig_bbox[1,1] - subfig_bbox[0,1]), # bottom
            (subfig_bbox[1,0] - subfig_bbox[0,0]) * (extents[2]), # width
            (subfig_bbox[1,1] - subfig_bbox[0,1]) * (extents[3]) # height
        )
        pixel_extents = (
            (fig_space_extents[0] - fig_bbox[0,0]) / (fig_bbox[1,0] - fig_bbox[0,0]) * base_size[0],
            (fig_space_extents[1] - fig_bbox[0,1]) / (fig_bbox[1,1]  - fig_bbox[0,1]) * base_size[1],
            (fig_space_extents[2]) / (fig_bbox[1,0] - fig_bbox[0,0]) * base_size[0],
            (fig_space_extents[3]) / (fig_bbox[1,1] - fig_bbox[0,1]) * base_size[1]
        )
        scale_factor = min(pixel_extents[2] / external_size[0], pixel_extents[3] / external_size[1])
        print(f'Base pixel size:{base_size}\nsubfig {subfig}:\n\tsize:{external_size}\n\tfig_space_extents:{fig_space_extents}\n\tpixel_extents:{pixel_extents}\n\tscale factor:{scale_factor}')
        # First, scale the  external SVG then move to the correct location.
        external_elem = external_svg.getroot()
        # Figure out the base units, and convert to that.
        offset = pixels_to_base_units(base_svg, (pixel_extents[0], base_size[1] - pixel_extents[1] - pixel_extents[3]))
        external_elem.moveto(*offset, scale_factor)
        base_svg.append(external_elem)
    base_svg.save(final_filename)

In [None]:
def draw_arrows(subfig, ax, x1, x2, y1, y2, dy, dir1, dir2, c1, c2):
    subfig.add_artist(matplotlib.lines.Line2D([x1, x1], [y1+0.02, y2],
        linewidth=2.5, color=c1, solid_capstyle='projecting', transform=ax.transAxes))
    subfig.add_artist(matplotlib.lines.Line2D([x2, x2], [y1+0.02, y2],
        linewidth=2.5, color=c2, solid_capstyle='projecting', transform=ax.transAxes))
    subfig.add_artist(matplotlib.patches.FancyArrow(x1, y2, dy * dir1, 0,
        width=0.025, color=c1, transform=ax.transAxes))
    subfig.add_artist(matplotlib.patches.FancyArrow(x2, y2, dy * dir2, 0,
        width=0.025, color=c2, transform=ax.transAxes))
    subfig.add_artist(matplotlib.lines.Line2D([0.2, 0.8], [y1, y1],
        linewidth=2, color='k', transform=ax.transAxes))


In [None]:
main_palette = {
    'tandem': '#A58245',
    'convergent': '#18B7CE',
    'divergent': '#AC4140',
    'gray': '#666666'
}
light_palette = {k:v + '28' for k, v in main_palette.items()}
no_yellow_viridis = matplotlib.colors.ListedColormap(cm.get_cmap('viridis', 256)(np.linspace(0,0.8,256)))

## Figure 1

### Visual description
A 160mm x 130mm image (6.30 x 5.12 in).

- **Panel A**: A cartoon of polymerases coming on and off a piece of DNA, showing the binding energy dE, accumulated supercoiling, and the key variables.
- **Panel B**: A diagram of the key tested variables in Figure 1, including circular/linear, different orientations, and delta spacing.
- **Panel C**: Three diagrams that show (normalized) reporter output (y-axis) vs induction (x-axis), with different plots for the different supercoiling initiation values and different curves for each orientation (constant spacing). These plots are all linear BCs.
- **Panel D**: Two subplots which compare linear vs plasmid BCs vs different induction conditions, using one of the sc initiation panels.
- **Panel E**: Three subplots. These are scatterplots (density plots?) of the three conditions (at full induction) that show the noise behavior of the system. Added on is a schematic reminding of the intrinsic/extrinsic noise.
- **Panel F**: Three subplots, showing reporter output as a function of induction for three different spacing levels.

### Caption

In [None]:
if (datadir/'projects'/'tangles'/'simulations'/'fig1_fig2_summary.gzip').exists():
    summary_df = pd.read_parquet(datadir/'projects'/'tangles'/'simulations'/'fig1_fig2_summary.gzip')
    print(f'Loaded {len(summary_df)} total simulation runs from Parquet cached file')
else:
    summary_df = []
    with h5py.File(
        datadir/'projects'/'tangles'/'simulations'/'modeling_fig1_fig2_combined_summaries.h5', 'r') as summaries:
        for group in summaries.keys():
            group_df = []
            for run in summaries[group].values():
                group_df.append(pd.DataFrame(data={
                    'inducible_mRNA': run['final_mRNA'][0,:],
                    'constant_mRNA': run['final_mRNA'][1,:]}))
                group_df[-1]['induction'] = run.attrs['gene.base_rate'][0]
                group_df[-1]['condition'] = run.attrs['comment'].decode('utf-8')
                group_df[-1]['is_plasmid'] = run.attrs['bcs.is_circular']
                for term in [
                    'rates.sc_dependent', 'coeff.sigma_squared',
                    'coeff.mRNA_drag_exponent', 'coeff.mRNA_drag_coeff',
                    'rnap.max_velocity', 'rnap.stall_torque', 'rnap.stall_width']:
                    group_df[-1][term] = run.attrs[term]
                gene_endpoints = sorted(np.concatenate((
                    run.attrs['gene.start'],
                    run.attrs['gene.end'])))
                group_df[-1]['spacing'] = int((gene_endpoints[2] - gene_endpoints[1]) / 0.34)
            summary_df.append(pd.concat(group_df, ignore_index=True))
            print(f'Loaded {len(summary_df[-1])} simulations...')
    summary_df = pd.concat(summary_df, ignore_index=True)
    print(f'Loaded {len(summary_df)} total simulation runs!')
    summary_df['condition'] = pd.Categorical(summary_df['condition'])
    summary_df.to_parquet(datadir/'projects'/'tangles'/'simulations'/'fig1_fig2_summary.gzip', compression='gzip')

In [None]:
# Randomly downsample (replace later)
#summary_df = summary_df.sample(frac=0.1, random_state=2022)

In [None]:
# Create the overall figure, gridspec, and add subfigure labels.
fig1 = plt.figure(figsize=(6.30,5.12))
fig1_gridspec = gridspec.GridSpec(3, 2, figure=fig1,
    wspace=0.1, hspace=0.1, height_ratios=[0.8, 1.0, 1.0])
fig1_subfigures = {label: fig1.add_subfigure(fig1_gridspec[ij[0],ij[1]])
    for label, ij in zip('ABCDEF', itertools.product((0,1,2), (0,1)))}
for label, subfig in fig1_subfigures.items():
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}.', fontsize=13, fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))
# Add empty axes to get artist to draw
fig1_subfigures['A'].add_axes((0.1,0.1,0.9,0.9), frameon=False, xticks=[], yticks=[])
fig1_palette = {
    'fig1.tandem_upstream': main_palette['tandem'],
    'fig1.convergent': main_palette['convergent'],
    'fig1.divergent': main_palette['divergent'],
    }

In [None]:
df_1c = summary_df[
    (
        summary_df['is_plasmid'] == 0
    ) & (
        #(summary_df['condition'] == 'fig1.tandem_downstream') |
        (summary_df['condition'] == 'fig1.tandem_upstream') |
        (summary_df['condition'] == 'fig1.convergent') |
        (summary_df['condition'] == 'fig1.divergent')
    )
].copy()
df_1c['condition'] = df_1c['condition'].cat.remove_unused_categories()
df_1c_normed = (
    df_1c.set_index(
        [
            'condition',
            'rates.sc_dependent',
            'coeff.sigma_squared',
            'induction'
        ])['constant_mRNA'] # Select the constant_mRNA column
    / df_1c.groupby([
        'condition',
        'rates.sc_dependent',
        'coeff.sigma_squared',
        'induction']).mean().xs( # Divide by the zero-induction, tandem case
            df_1c['induction'].unique()[0], level='induction').xs(
            'fig1.tandem_upstream', level='condition')['constant_mRNA']).reset_index()
df_1c_normed['norm_induction'] = df_1c_normed['induction'] / (1.0 / 120.0)
# Tweak subplot layout
fig1_subfigures['C'].subplots_adjust(left=0.175, bottom=0.225, right=0.9, top=0.8)
fig_1c_axes = fig1_subfigures['C'].subplots(ncols=3, sharey=True)
sns.lineplot(data=df_1c_normed[df_1c_normed['rates.sc_dependent'] == 0.0],
             x='norm_induction', y='constant_mRNA', hue='condition', palette=fig1_palette, linewidth=2,
             legend=False, ax=fig_1c_axes[0])
sns.despine(ax=fig_1c_axes[0])
sns.lineplot(data=df_1c_normed[(df_1c_normed['rates.sc_dependent']==1.0) & (df_1c_normed['coeff.sigma_squared']==0.001)],
             x='norm_induction', y='constant_mRNA', hue='condition', palette=fig1_palette, linewidth=2,
             legend=False, ax=fig_1c_axes[1])
sns.despine(ax=fig_1c_axes[1])
sns.lineplot(data=df_1c_normed[(df_1c_normed['rates.sc_dependent']==1.0) & (np.abs(df_1c_normed['coeff.sigma_squared']-0.0316) < 0.001)],
             x='norm_induction', y='constant_mRNA', hue='condition', palette=fig1_palette, linewidth=2,
             legend=False, ax=fig_1c_axes[2])
sns.despine(ax=fig_1c_axes[2])
for ax in fig_1c_axes:
    ax.set_xlim([0.0, 2.5])
    ax.set_xticks([0.0, 1.0, 2.0])
    ax.set_xticklabels(['0x', '1x', '2x'])
    ax.set_ylim([0.0, 1.4])
# Set labels
fig_1c_axes[0].set_ylabel('Reporter output')
fig_1c_axes[0].set_xlabel('')
fig_1c_axes[1].set_xlabel('Fold induction')
fig_1c_axes[2].set_xlabel('')
# Add annotations
fig1_subfigures['C'].text(0.5, 1.15, 'sc. independent', horizontalalignment='center', transform=fig_1c_axes[0].transAxes, fontsize=9, fontweight='bold')
fig1_subfigures['C'].text(1.05, 1.15, 'sc. dependent', horizontalalignment='center', transform=fig_1c_axes[1].transAxes, fontsize=9, fontweight='bold')
fig1_subfigures['C'].add_artist(matplotlib.lines.Line2D([0.505, 0.845], [0.87, 0.87], transform=fig1_subfigures['C'].transSubfigure, color='k'))
fig1_subfigures['C'].text(0.5, 1.0, 'f(1)', horizontalalignment='center', transform=fig_1c_axes[0].transAxes, fontsize=9)
fig1_subfigures['C'].text(0.5, 1.0, 'f(σ)', horizontalalignment='center', transform=fig_1c_axes[1].transAxes, fontsize=9)
fig1_subfigures['C'].text(0.5, 1.0, 'f(σ,σ²)', horizontalalignment='center', transform=fig_1c_axes[2].transAxes, fontsize=9)
del df_1c

In [None]:
df_1d = summary_df[
    (
        (np.abs(summary_df['coeff.sigma_squared'] - 0.0316) < 0.001) &
        (summary_df['rates.sc_dependent'] == 1.0)
    ) & (
        #(summary_df['condition'] == 'fig1.tandem_downstream') |
        (summary_df['condition'] == 'fig1.tandem_upstream') |
        (summary_df['condition'] == 'fig1.convergent') |
        (summary_df['condition'] == 'fig1.divergent')
    )
].copy()
df_1d['condition'] = df_1d['condition'].cat.remove_unused_categories()
df_1d['norm_induction'] = df_1d['induction'] / (1.0 / 120.0)
df_1d['constant_mRNA'] = df_1d['constant_mRNA'] / 200.0
fig1_subfigures['D'].subplots_adjust(left=0.2, bottom=0.225, right=0.95, top=0.90)
fig_1d_axes = fig1_subfigures['D'].subplots(ncols=2, sharey=True)
sns.lineplot(data=df_1d[df_1d['is_plasmid']==0.0],
             x='norm_induction', y='constant_mRNA', hue='condition',
             palette=fig1_palette, legend=False, linewidth=2,
             ax=fig_1d_axes[0])
sns.despine(ax=fig_1d_axes[0])
sns.lineplot(data=df_1d[df_1d['is_plasmid']==1.0],
             x='norm_induction', y='constant_mRNA', hue='condition',
             palette=fig1_palette, legend=False, linewidth=2,
             ax=fig_1d_axes[1])
sns.despine(ax=fig_1d_axes[1])
for ax in fig_1d_axes:
    ax.set_xlabel('')
    ax.set_xlim([0.0, 2.5])
    ax.set_xticks([0.0, 1.0, 2.0])
    ax.set_xticklabels(['0x', '1x', '2x'])
    ax.set_ylim([0.0, 1.2])
fig_1d_axes[0].set_ylabel('Reporter output')
fig1_subfigures['D'].text(0.5, 0.01, 'Fold induction')
fig1_subfigures['D'].text(0.5, 1.0, 'Linear', horizontalalignment='center', transform=fig_1d_axes[0].transAxes, fontsize=9, fontweight='bold')
fig1_subfigures['D'].text(0.5, 1.0, 'Circular', horizontalalignment='center', transform=fig_1d_axes[1].transAxes, fontsize=9, fontweight='bold')

del df_1d

In [None]:
df_1e = summary_df[
    (
        summary_df['is_plasmid'] == 0
    ) & (
        #(summary_df['condition'] == 'fig1.tandem_downstream') |
        (summary_df['condition'] == 'fig1.tandem_upstream') |
        (summary_df['condition'] == 'fig1.convergent') |
        (summary_df['condition'] == 'fig1.divergent')
    ) & (
        np.abs(summary_df['induction'] - 8.0e-3) < 1e-4
    ) & (
        (summary_df['rates.sc_dependent'] == 1.0) &
        (np.abs(summary_df['coeff.sigma_squared'] - 0.0316) < 0.001)
    )
].copy()
df_1e['norm_reporter']  = df_1e['constant_mRNA'] / 258.0
df_1e['norm_inducible'] = df_1e['inducible_mRNA'] / 258.0
fig1_subfigures['E'].subplots_adjust(left=0.175, bottom=0.25, right=0.9, top=0.65)
fig_1e_axes = fig1_subfigures['E'].subplots(ncols=3, sharey=True)
sns.kdeplot(data=df_1e[df_1e['condition'] == 'fig1.tandem_upstream'],
            x='norm_inducible', y='norm_reporter', color=main_palette['tandem'],
            ax=fig_1e_axes[0])
sns.despine(ax=fig_1e_axes[0])
sns.kdeplot(data=df_1e[df_1e['condition'] == 'fig1.convergent'],
            x='norm_inducible', y='norm_reporter', color=main_palette['convergent'],
            ax=fig_1e_axes[1])
sns.despine(ax=fig_1e_axes[1])
sns.kdeplot(data=df_1e[df_1e['condition'] == 'fig1.divergent'],
            x='norm_inducible', y='norm_reporter', color=main_palette['divergent'],
            ax=fig_1e_axes[2])
sns.despine(ax=fig_1e_axes[2])
for ax in fig_1e_axes:
    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.0])
    ax.set_xticks([0.0, 1.0])
    ax.set_yticks([0.0, 1.0])

fig_1e_axes[0].set_ylabel('Reporter output')
fig_1e_axes[0].set_xlabel('')
fig_1e_axes[1].set_xlabel('Inducible gene output')
fig_1e_axes[1].set_ylabel('')
fig_1e_axes[2].set_xlabel('')

#fig_1e_axes[0].arrow(0.3, 0.7, -0.2, 0.2)
#fig_1e_axes[0].arrow(0.3, 0.7,  0.2, 0.2)
fig_1e_axes[0].annotate(text='', xytext=(0.405, 0.595), xy=(0.2, 0.8), arrowprops={'facecolor': 'black', 'edgecolor': '#000000ff', 'width': 0.8, 'headwidth': 5.0, 'headlength': 5.0, 'shrink': 0.0, 'capstyle': 'round'})
fig_1e_axes[0].annotate(text='', xytext=(0.395, 0.595), xy=(0.6, 0.8), arrowprops={'facecolor': 'black', 'edgecolor': '#000000ff', 'width': 0.8, 'headwidth': 5.0, 'headlength': 5.0, 'shrink': 0.0, 'capstyle': 'round'})
fig_1e_axes[0].text(0.2, 0.84, 'int.', horizontalalignment='center', transform=fig_1e_axes[0].transData, fontsize=9, fontstyle='italic')
fig_1e_axes[0].text(0.6, 0.84, 'ext.', horizontalalignment='center', transform=fig_1e_axes[0].transData, fontsize=9, fontstyle='italic')
fig1_subfigures['E'].text(0.4, 1.00, 'noise', horizontalalignment='center', transform=fig_1e_axes[0].transData, fontsize=9, fontstyle='italic')
for header, ax in zip(['Tandem', 'Convergent', 'Divergent'], fig_1e_axes):
    fig1_subfigures['E'].text(0.5, 1.2, header, horizontalalignment='center', transform=ax.transAxes, fontsize=9, fontweight='bold')

del df_1e


In [None]:
df_1f = summary_df[
    (
        summary_df['is_plasmid'] == 0
    ) & (
        #(summary_df['condition'] == 'fig1.tandem_downstream') |
        (summary_df['condition'] == 'fig1f.spacing.tandem_upstream') |
        (summary_df['condition'] == 'fig1f.spacing.convergent') |
        (summary_df['condition'] == 'fig1f.spacing.divergent')
    ) & (
        (summary_df['rates.sc_dependent'] == 1.0) &
        (summary_df['coeff.sigma_squared'] == 0.02)
    )
].copy()
df_1f['norm_induction'] = df_1f['induction'] / (1.0 / 120.0)
df_1f['norm_reporter']  = df_1f['constant_mRNA'] / 300.0
df_1f['condition'] = df_1f['condition'].cat.remove_unused_categories()
df_1f['delta_reporter'] = df_1f.groupby(['condition', 'norm_induction', 'spacing'])['norm_reporter'].transform(lambda col: col - max_spacing_median.xs(col.name[0], level='condition').xs(col.name[1]))
fig1_subfigures['F'].subplots_adjust(left=0.175, bottom=0.25, right=0.75, top=0.65)
fig_1f_axes = fig1_subfigures['F'].subplots(ncols=4, sharey=True, gridspec_kw={'width_ratios':[1,1,1,0.3]})
# Break the share on the fourth axis
fig_1f_axes[0].get_shared_y_axes().remove(fig_1f_axes[3])
for condition, ax in zip(['fig1f.spacing.tandem_upstream', 'fig1f.spacing.convergent', 'fig1f.spacing.divergent'], fig_1f_axes):
    sns.lineplot(data=df_1f[df_1f['condition']==condition], x='norm_induction', y='delta_reporter', hue='spacing',
        palette='viridis', hue_norm=matplotlib.colors.Normalize(vmin=500, vmax=12000),
        legend=False, ax=ax)
    ax.set_facecolor(light_palette[condition.replace('_','.').split('.')[2]])
    sns.despine(ax=ax)
# Add colorbar
fig1_subfigures['F'].colorbar(cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=500, vmax=12000), cmap='viridis'), cax=fig_1f_axes[3])
for header, ax in zip(['Tandem', 'Convergent', 'Divergent'], fig_1f_axes[:3]):
    ax.set_xlim([0.0, 2.0])
    ax.set_ylim([-0.4,0.4])
    fig1_subfigures['F'].text(0.5, 1.2, header, horizontalalignment='center', transform=ax.transAxes, fontsize=8, fontweight='bold')
fig_1f_axes[0].set_ylabel('ΔReporter output')
fig_1f_axes[0].set_xlabel('')
fig_1f_axes[1].set_xlabel('Fold induction')
fig_1f_axes[2].set_xlabel('')

del df_1f

In [None]:
fig1.savefig('../writeups/figures/modeling_paper/fig_1_mpl.svg')
add_external_svgs('../writeups/figures/modeling_paper/fig_1.svg', '../writeups/figures/modeling_paper/fig_1_mpl.svg', fig1_subfigures, {
    'A': {
        'filename': '../writeups/figures/modeling_paper/fig_1a.svg',
        'extents': (0.05, 0.05, 0.9, 0.9)
    },
    'B': {
        'filename': '../writeups/figures/modeling_paper/fig_1b.svg',
        'extents': (0.05, 0.05, 0.9, 0.9)
    },
})

In [None]:
del summary_df

## Figure 2

In [None]:
if (datadir/'projects'/'tangles'/'simulations'/'fig2_reprocessed.gzip').exists():
    fig2_df = pd.read_parquet(datadir/'projects'/'tangles'/'simulations'/'fig2_reprocessed.gzip')
    print(f'Loaded {len(fig2_df)} total datapoints from Parquet cached file')
else:
    with h5py.File(
        datadir/'projects'/'tangles'/'simulations'/'modeling_fig2_combined.h5', 'r') as fig2_raw:
        fig2_df = []
        i = 0
        for group in fig2_raw.keys():
            group_df = []
            for run in fig2_raw[group].values():
                run_shape = run['mRNA'].shape
                for subrun in range(run_shape[2]):
                    inducible = run['mRNA'][:,0,:]
                    constant  = run['mRNA'][:,1,:]
                    group_df.append(pd.DataFrame(data={
                        'inducible_mRNA': inducible[:,subrun],
                        'constant_mRNA':  constant[:,subrun],
                        'time': run['time']}))
                    group_df[-1]['induction'] = run.attrs['step_induction']
                    group_df[-1]['condition'] = run.attrs['comment'].decode('utf-8')
                    group_df[-1]['idx'] = i
                    i += 1
                    gene_endpoints = sorted(np.concatenate((
                        run.attrs['gene.start'],
                        run.attrs['gene.end'])))
                    group_df[-1]['spacing'] = int((gene_endpoints[2] - gene_endpoints[1]) / 0.34)
                    for term in [
                        'step_time', 'coeff.sigma_squared',]:
                        group_df[-1][term] = run.attrs[term]
            fig2_df.append(pd.concat(group_df, ignore_index=True))
            print(f'Loaded {len(fig2_df[-1])} datapoints...')
        fig2_df = pd.concat(fig2_df, ignore_index=True)
        print(f'Loaded {len(fig2_df)} total datapoints!')
        fig2_df.to_parquet(datadir/'projects'/'tangles'/'simulations'/'fig2_reprocessed.gzip', compression='gzip')

In [None]:
fig2_df['induction'].unique()

In [None]:
# Create the overall figure, gridspec, and add subfigure labels.
fig2 = plt.figure(figsize=(6.30,5.12))
fig2_gridspec = gridspec.GridSpec(3, 3, figure=fig2,
    wspace=0.1, hspace=0.1, height_ratios=[1.0, 1.0, 1.0])
fig2_subfigures = {
    'A': fig2.add_subfigure(fig2_gridspec[0,:]),
    'B': fig2.add_subfigure(fig2_gridspec[1,0]),
    'C': fig2.add_subfigure(fig2_gridspec[1,1:]),
    'D': fig2.add_subfigure(fig2_gridspec[2,:])
}
for label, subfig in fig2_subfigures.items():
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}.', fontsize=13, fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))
fig2_palette = {
    'fig2.tandem_down': main_palette['tandem'],
    'fig2.tandem_up': '#f1be64',
    'fig2.convergent': main_palette['convergent'],
    'fig2.divergent': main_palette['divergent'],
    'gray': main_palette['gray'],
    'background_gray': '#DDDDDD'
    }

In [None]:
# Prep Fig2a
# For fig2a, find three single runs (tandem, convergent, divergent) at maximum induction
run_ids = {condition: fig2_df[
    (fig2_df['condition']==condition)
    & (np.abs(fig2_df['induction'] - 1.0) < 0.01)
    & (fig2_df['coeff.sigma_squared'] == 0.02)]['idx'].iloc[0] for condition in fig2_df['condition'].unique()}

In [None]:
# Plot Fig2a
fig2_subfigures['A'].subplots_adjust(left=0.08, bottom=0.225, right=0.9, top=0.8)
fig_2a_axes = fig2_subfigures['A'].subplots(ncols=4, sharey=True)
for ax, (cond, id) in zip(fig_2a_axes, run_ids.items()):
    run = fig2_df[fig2_df.idx == id].copy()
    run['normed_constant'] = run['constant_mRNA'] / 380
    run['normed_inducible']= run['inducible_mRNA'] / 380
    ax.axvspan(10000, max(run['time']), color=fig2_palette['background_gray'])
    sns.lineplot(data=run, x='time', y='normed_constant', color=fig2_palette[cond], linewidth=1, legend=False, ax=ax)
    sns.lineplot(data=run, x='time', y='normed_inducible', color=fig2_palette['gray'], linewidth=1, legend=False, ax=ax)
    sns.despine(ax=ax)
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_xlim([0,40000])
    ax.set_ylim([-0.02,1.2])
fig_2a_axes[0].set_ylabel('Gene output')
fig2_subfigures['A'].text(0.5, 0.03, "Time (s)", horizontalalignment='center')
for ax, cond in zip(fig_2a_axes, run_ids.keys()):
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_horizontalalignment('right')
    if cond.endswith('tandem_down'):
        draw_arrows(fig2_subfigures['A'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig2_palette[cond], fig2_palette['gray'])
    elif cond.endswith('tandem_up'):
        draw_arrows(fig2_subfigures['A'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig2_palette['gray'], fig2_palette[cond])
    elif cond.endswith('convergent'):
        draw_arrows(fig2_subfigures['A'], ax,
            0.28, 0.72, 1.1, 1.25, 0.07, 1, -1,
            fig2_palette[cond], fig2_palette['gray'])
    elif cond.endswith('divergent'):
        draw_arrows(fig2_subfigures['A'], ax,
            0.4, 0.6, 1.1, 1.25, 0.07, -1, 1,
            fig2_palette[cond], fig2_palette['gray'])

fig2.savefig('test.svg')

In [None]:
# Plot Fig2b
fig2b_df = fig2_df[
    (fig2_df['coeff.sigma_squared'] == 0.02) &
    (np.abs(fig2_df['induction'] - 1.0) < 0.01)
].copy()
fig2b_df['normed_constant'] = fig2b_df['constant_mRNA'] / 380
fig2b_std = fig2b_df.groupby(['condition', 'time']).std().reset_index()
fig2b_mean = fig2b_df.groupby(['condition', 'time']).mean().reset_index()
pre_induction_noise = fig2b_std[
    (fig2b_mean.time < 10000) &
    (fig2b_mean.time > 1000)]['normed_constant'].mean()
fig2_subfigures['B'].subplots_adjust(left=0.25, bottom=0.24, right=0.9, top=0.9)
fig_2b_ax = fig2_subfigures['B'].subplots(ncols=1)
fig_2b_ax.axvspan(10000, max(fig2b_df['time']), color=fig2_palette['background_gray'])
sns.lineplot(data=fig2b_std, x='time', y='normed_constant', hue='condition',
    palette=fig2_palette, linewidth=1, estimator=None, ax=fig_2b_ax, legend=None)
fig_2b_ax.axhline(y=pre_induction_noise, color=fig2_palette['gray'], linestyle=':')
sns.despine(ax=fig_2b_ax)
fig_2b_ax.set_ylabel('Standard deviation')
fig_2b_ax.set_xlabel('Time (s)')
fig_2b_ax.set_xlim([0, 40000])
fig_2b_ax.set_ylim([0, 0.4])
fig2.savefig('test.svg')

In [None]:
# Plot Fig2d
# Calculate average convolution
def df_convolve_2d(df):
    constant_normed  = (df['constant_mRNA'] - df['constant_mRNA'].mean())
    inducible_normed = (df['inducible_mRNA'] - df['inducible_mRNA'].mean())
    geom_norm = np.sqrt(
        scipy.signal.correlate(constant_normed, constant_normed, mode='valid') *
        scipy.signal.correlate(inducible_normed, inducible_normed, mode='valid'))
    geom_norm = geom_norm if geom_norm > 1e-10 else 1.0
    return pd.DataFrame({
        'lags':scipy.signal.correlation_lags(len(constant_normed), len(inducible_normed)),
        'cross_corr':scipy.signal.correlate(inducible_normed, constant_normed) / geom_norm
    })
fig2_convolutions = fig2_df[
    (fig2_df.time > 15000)
    & (
        (np.abs(fig2_df.induction - 0.464) < 0.01) |
        (np.abs(fig2_df.induction - 1.0) < 0.01) |
        (np.abs(fig2_df.induction - 2.61) < 0.01)
    )
    & (fig2_df['coeff.sigma_squared'] == 0.02)].groupby([
        'condition', 'coeff.sigma_squared', 'induction', 'idx'
    ]).apply(df_convolve_2d).reset_index().groupby([
        'condition', 'induction', 'lags']).mean().reset_index()
time_delta = np.diff(fig2_df['time'].unique())[0]
fig2_convolutions['time'] = fig2_convolutions['lags'] * time_delta
fig2_subfigures['D'].subplots_adjust(left=0.1, bottom=0.225, right=0.9, top=0.8)
fig_2d_axes = fig2_subfigures['D'].subplots(ncols=5, sharey=False,
    gridspec_kw={'width_ratios':[1.0,1.0,1.0,1.0,0.2]})
# Manually share the axes
for ax in fig_2d_axes[1:4]:
    fig_2d_axes[0].get_shared_y_axes().join(fig_2d_axes[0], ax)
    ax.yaxis.set_tick_params(which='both', labelleft=False, labelright=False)
for condition, ax in zip(
        ['fig2.tandem_down', 'fig2.tandem_up', 'fig2.convergent', 'fig2.divergent'],
        fig_2d_axes):
    sns.lineplot(data=fig2_convolutions[fig2_convolutions.condition == condition],
        x='time', y='cross_corr', linewidth=1.5, palette=no_yellow_viridis,
        hue='induction', hue_norm=matplotlib.colors.LogNorm(vmin=0.4, vmax=2.7),
        legend=None, ax=ax)
    sns.despine(ax=ax)
    ax.axhline(y=0, color='k')
    ax.axvline(x=0, color='#888888', linestyle=':')
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.xaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(
        lambda x, _: f'{x}' if abs(x) < 1000 else f'{x/1000:.0f}k'))
    if condition.endswith('tandem_down'):
        draw_arrows(fig2_subfigures['D'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig2_palette[condition], fig2_palette['gray'])
    elif condition.endswith('tandem_up'):
        draw_arrows(fig2_subfigures['D'], ax,
            0.35, 0.6, 1.1, 1.25, 0.07, 1, 1,
            fig2_palette['gray'], fig2_palette[condition])
    elif condition.endswith('convergent'):
        draw_arrows(fig2_subfigures['D'], ax,
            0.28, 0.72, 1.1, 1.25, 0.07, 1, -1,
            fig2_palette[condition], fig2_palette['gray'])
    elif condition.endswith('divergent'):
        draw_arrows(fig2_subfigures['D'], ax,
            0.4, 0.6, 1.1, 1.25, 0.07, -1, 1,
            fig2_palette[condition], fig2_palette['gray'])

fig2.savefig('test.svg')
fig_2d_axes[0].set_ylabel('Cross-correlation')
fig2_subfigures['D'].text(0.5, 0.03, "Time offset (s)", horizontalalignment='center')
fig2d_cbar = fig2_subfigures['D'].colorbar(cm.ScalarMappable(
    norm=matplotlib.colors.LogNorm(vmin=0.4, vmax=2.7), cmap=no_yellow_viridis), cax=fig_2d_axes[4],
    label='Fold induction')
fig2d_cbar.set_ticks([0.4,1.0,2.6], labels=['0.4x', '1.0x', '2.6x'])
fig2d_cbar.set_ticks([0.5, 0.6, 0.7, 0.8, 0.9, 2.0], labels=[], minor=True)
fig2.savefig('test.svg')

In [None]:
fig2.savefig('../writeups/figures/modeling_paper/fig_2_mpl.svg')

## Figure 4

In [None]:
if (datadir/'projects'/'tangles'/'simulations'/'fig4_examples.gzip').exists():
    fig4_examples = pd.read_parquet(datadir/'projects'/'tangles'/'simulations'/'fig4_examples.gzip')
    print(f'Loaded {len(fig4_examples)} total example datapoints from Parquet cached file')
else:
    with h5py.File(
        datadir/'projects'/'tangles'/'simulations'/'modeling_fig4_combined.h5', 'r') as fig4_raw:
        n_examples = 5
        example_counts = {}
        fig4_examples = []
        i = 1
        for group in fig4_raw.keys():
            for run in fig4_raw[group].values():
                run_shape = run['mRNA'].shape
                condition = run.attrs['comment'].decode('utf-8')
                temperature = run.attrs['temperature']
                c_tup = (condition, temperature)
                if c_tup not in example_counts or example_counts[c_tup] < n_examples:
                    fig4_examples.append(pd.DataFrame(data={
                        'condition': condition,
                        'temperature': temperature,
                        'time': run['time'],
                        'her1_mRNA': run['mRNA'][:,0,0],
                        'her7_mRNA': run['mRNA'][:,1,0],
                        'her1_protein': run['discrete_components'][:,0,0],
                        'her7_protein': run['discrete_components'][:,1,0],
                        'hes6_protein': run['discrete_components'][:,2,0],
                        'her1_promoter_empty': run['discrete_components'][:,3,0],
                        'her7_promoter_empty': run['discrete_components'][:,4,0],
                        'her1_promoter_with_11': run['discrete_components'][:,5,0],
                        'her1_promoter_with_76': run['discrete_components'][:,6,0],
                        'her7_promoter_with_11': run['discrete_components'][:,7,0],
                        'her7_promoter_with_76': run['discrete_components'][:,8,0],
                        'idx': i
                    }))
                    i += 1
                    if c_tup not in example_counts:
                        example_counts[c_tup] = 1
                    else:
                        example_counts[c_tup] += 1
        fig4_examples = pd.concat(fig4_examples)
        print(f'Loaded {len(fig4_examples)} example datapoints!')
        fig4_examples.to_parquet(datadir/'projects'/'tangles'/'simulations'/'fig4_examples.gzip', compression='gzip')
if (datadir/'projects'/'tangles'/'simulations'/'fig4_summaries.gzip').exists():
    fig4_df = pd.read_parquet(datadir/'projects'/'tangles'/'simulations'/'fig4_summaries.gzip')
    print(f'Loaded {len(fig4_df)} simulation runs from Parquet cached file')
else:
    with h5py.File(
        datadir/'projects'/'tangles'/'simulations'/'modeling_fig4_combined.h5', 'r') as fig4_raw:
        fig4_df = []
        i = 0
        # Extract the Spearman correlation between the two mRNA values
        # alongside storing the mean-centered correlation and auto-correlation
        # of the two.
        for group in fig4_raw.keys():
            group_df = []
            for run in fig4_raw[group].values():
                run_shape = run['mRNA'].shape
                for subrun in range(run_shape[2]):
                    time = run['time'][:]
                    # Perform correlation after "burn-in" time
                    # of 10000 seconds
                    her1 = run['mRNA'][:,0,subrun]
                    her7 = run['mRNA'][:,1,subrun]
                    her1_trimmed = her1[time > 10000]
                    her7_trimmed = her7[time > 10000]
                    mc_her1 = her1_trimmed - np.mean(her1_trimmed)
                    mc_her7 = her7_trimmed - np.mean(her7_trimmed)
                    sc, _ = scipy.stats.spearmanr(her1_trimmed, her7_trimmed)
                    cross_corr = scipy.signal.correlate(mc_her1, mc_her7)
                    autocorr_correction = np.sqrt(
                        scipy.signal.correlate(mc_her1, mc_her1, mode='valid')[0] * 
                        scipy.signal.correlate(mc_her7, mc_her7, mode='valid')[0])
                    group_df.append(pd.DataFrame(data={
                        'spearman_corr': sc,
                        'cross_corr': (cross_corr,),
                        'autocorr_correction': autocorr_correction,
                        'condition': run.attrs['comment'].decode('utf-8'),
                        'temperature': run.attrs['temperature'],
                        'idx': i}))
                    i += 1
            fig4_df.append(pd.concat(group_df, ignore_index=True))
            print(f'Loaded {len(fig4_df[-1])} simulations...')
        fig4_df = pd.concat(fig4_df, ignore_index=True)
        print(f'Loaded {len(fig4_df)} total simulations!')
        fig4_df.to_parquet(datadir/'projects'/'tangles'/'simulations'/'fig4_summaries.gzip', compression='gzip')

In [None]:
# Create the overall figure, gridspec, and add subfigure labels.
fig4 = plt.figure(figsize=(6.2,5.12))
fig4_gridspec = gridspec.GridSpec(3, 2, figure=fig4,
    wspace=0.1, hspace=0.1)
fig4_subfigures = {
    'A': fig4.add_subfigure(fig4_gridspec[0,0]),
    'B': fig4.add_subfigure(fig4_gridspec[0,1]),
    'C': fig4.add_subfigure(fig4_gridspec[1,:]),
    'D': fig4.add_subfigure(fig4_gridspec[2,0]),
    'E': fig4.add_subfigure(fig4_gridspec[2,1])
}
for label, subfig in fig4_subfigures.items():
    subfig.add_artist(matplotlib.text.Text(x=0, y=1, text=f'{label}.', fontsize=13, fontweight='bold', verticalalignment='top',transform=subfig.transSubfigure))
fig4_palette = {
    'her1': '#5254a3',
    'her7': '#9c9ede',
    'fig4.uncoupled': '#b5cf6b',
    'fig4.fully-coupled': '#637939',
    'fig4.tangles-coupled': main_palette['divergent'],
    'gray': main_palette['gray'],
    'background_gray': '#DDDDDD'
    }

In [None]:
# Plot Fig2c
fig4_subfigures['C'].subplots_adjust(left=0.12, bottom=0.25, right=0.95, top=0.8)
fig_4c_axes = fig4_subfigures['C'].subplots(ncols=3, sharey=True)
for ax, condition in zip(fig_4c_axes, ['fig4.uncoupled', 'fig4.fully-coupled', 'fig4.tangles-coupled']):
    df = fig4_examples[(fig4_examples.condition == condition) & (fig4_examples.temperature == 301.15)]
    idx = df.idx.unique()[1]
    if condition.endswith('tangles-coupled'):
        tangles_idx = idx
    sns.lineplot(data=df[df.idx == idx], x='time', y='her1_mRNA', ax=ax, color=fig4_palette['her1'])
    sns.lineplot(data=df[df.idx == idx], x='time', y='her7_mRNA', ax=ax, color=fig4_palette['her7'])
    sns.despine(ax=ax)
    ax.set_ylim([0, 80])
    ax.set_xlim([0, 30000])
    ax.set_xticks([0, 15000, 30000])
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_facecolor(fig4_palette[condition] + '55')
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_horizontalalignment('right')
fig_4c_axes[0].set_ylabel('mRNA counts')
fig_4c_axes[1].set_xlabel('Time (s)')
fig_4c_axes[0].text(x=0.5, y=1.1, s='Uncoupled', horizontalalignment='center', transform=fig_4c_axes[0].transAxes)
fig_4c_axes[1].text(x=0.5, y=1.1, s='Transcript coupled', horizontalalignment='center', transform=fig_4c_axes[1].transAxes)
fig_4c_axes[2].text(x=0.5, y=1.1, s='Biophysical coupled', horizontalalignment='center', transform=fig_4c_axes[2].transAxes)
# Add inset axes
fig_4c_inset = fig_4c_axes[2].inset_axes([0.22, 0.78, 0.28, 0.28])
sns.lineplot(data=df[df.idx == tangles_idx], x='time', y='her1_mRNA',
    ax=fig_4c_inset, color=fig4_palette['her1'])
sns.lineplot(data=df[df.idx == tangles_idx], x='time', y='her7_mRNA',
    ax=fig_4c_inset, color=fig4_palette['her7'])
sns.despine(ax=fig_4c_inset)
fig_4c_inset.set_facecolor(fig4_palette['background_gray'])
fig_4c_inset.set_xlabel('')
fig_4c_inset.set_ylabel('')
fig_4c_inset.set_xticks([])
fig_4c_inset.set_yticks([])
fig4.savefig('test.svg')

In [None]:
# Plot Fig2d
fig4_subfigures['D'].subplots_adjust(left=0.2, bottom=0.23, right=0.95, top=0.9)
fig_4d_ax = fig4_subfigures['D'].subplots(ncols=1)
sns.violinplot(data=fig4_df[fig4_df.temperature == 301.15],
    x='condition', y='spearman_corr', inner='quartile',
    palette=fig4_palette, saturation=0.7,
    bw=0.1, ax=fig_4d_ax)
fig_4d_ax.set_xlabel('')
fig_4d_ax.set_ylabel('Correlation\ncoefficient')
fig_4d_ax.set_xticklabels(['Uncoupled','Transcript\ncoupled','Biophysical\ncoupled'])
sns.despine(ax=fig_4d_ax)
fig4.savefig('test.svg')

In [None]:
# Prepare Fig4e
fig_4e_crosscorr = fig4_df.groupby(['condition', 'temperature']).apply(lambda df:np.vstack(df.cross_corr / df.autocorr_correction).mean(axis=0)).reset_index()
signal_length = sum(fig4_examples.time.unique() > 10000)
fig_4e_crosscorr.insert(2, 'time_delay', 6 * [scipy.signal.correlation_lags(signal_length, signal_length) * np.diff(fig4_examples.time.unique())[0]])
fig_4e_crosscorr = fig_4e_crosscorr.rename(columns={0:'cross_correlation'})
fig_4e_crosscorr = fig_4e_crosscorr.explode(['time_delay', 'cross_correlation'])

In [None]:
# Plot Fig4e
fig4_subfigures['E'].subplots_adjust(left=0.2, bottom=0.23, right=0.95, top=0.9)
fig_4e_ax = fig4_subfigures['E'].subplots(ncols=1)
sns.lineplot(data=fig_4e_crosscorr[fig_4e_crosscorr.temperature == 301.15], x='time_delay', y='cross_correlation',
    hue='condition', palette=fig4_palette, ax=fig_4e_ax, estimator=None, legend=None)
sns.despine(ax=fig_4e_ax)
fig_4e_ax.set_ylim([-.3, 0.75])
fig_4e_ax.set_yticks(np.arange(-0.25, 1.0, 0.25))
fig_4e_ax.set_xlabel('Time delay (s)')
fig_4e_ax.set_ylabel('Cross-correlation')
fig_4e_ax.axhline(y=0, color='k')
fig_4e_ax.axvline(x=0, color='#888888', linestyle=':')
fig4.savefig('test.svg')

In [None]:
fig4.savefig('../writeups/figures/modeling_paper/fig_4_mpl.svg')

## Supplemental Figure 1
This can have the full sigma_squared coeff table (large part a). We can also have the noise decomp in the plasmid case, and the spacing-derived coupling in the plasmid case.

We should also include the non-normalized version of figure 1c


### Visual description
???

In [None]:
sns.lineplot(data=df_1c[df_1c['rates.sc_dependent'] == 0.0],
x='induction', y='constant_mRNA', hue='condition', ci='sd')
sns.despine()
plt.show()
print(df_1c[df_1c['rates.sc_dependent']==1.0]['coeff.sigma_squared'].unique())
sns.lineplot(data=df_1c[(df_1c['rates.sc_dependent']==1.0) & (df_1c['coeff.sigma_squared']==0.001)],
    x='induction', y='constant_mRNA', hue='condition', ci='sd')
sns.despine()
plt.show()
sns.lineplot(data=df_1c[(df_1c['rates.sc_dependent']==1.0) & (np.abs(df_1c['coeff.sigma_squared']-0.0316) < 0.001)],
    x='induction', y='constant_mRNA', hue='condition', ci='sd')
sns.despine()
plt.show()

## Figure 2

### Visual description
A 160mm x 120mm image (6.30 x 4.72 in).

- **Panel A**: A cartoon of polymerases coming on and off a piece of DNA, showing the binding energy dE, accumulated supercoiling, and the key variables.
- **Panel B**: A diagram of the key tested variables in Figure 1, including circular/linear, different orientations, and delta spacing.
- **Panel C**: Three diagrams that show (normalized) reporter output (y-axis) vs induction (x-axis), with different plots for the different supercoiling initiation values and different curves for each orientation (constant spacing). These plots are all linear BCs.
- **Panel D**: Two subplots which compare linear vs plasmid BCs vs different induction conditions, using one of the sc initiation panels.
- **Panel E**: Three subplots. These are scatterplots (density plots?) of the three conditions (at full induction) that show the noise behavior of the system. Added on is a schematic reminding 

### Caption