In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors
import matplotlib.cm as cm
import matplotlib.image
import matplotlib.path
import matplotlib.patches
import matplotlib.ticker
from matplotlib import animation
import matplotlib.gridspec as gridspec
import h5py
import pandas as pd
from pandas.api.types import CategoricalDtype
import numpy as np
import svgutils.transform as sg
import scipy.signal
import scipy.stats

from pathlib import Path
import re

import itertools
sns.set_style('ticks')
sns.set_context('paper',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

import rushd as rd

In [None]:
main_palette = {
    'tandem': '#225A9B',
    'tandem_alt': '#19D2BF',
    'convergent': '#FFB133',
    'divergent': '#FE484E',
    'gray': '#666666',
    'light_gray': '#888888',
    'axis_gray': '#262626'
}
font_sizes = {
    'colorbar_title': 8,
    'subaxis_title': 9,
    'line_annotation': 8,
    'data_annotation': 8,
}
light_palette = {k:v + '28' for k, v in main_palette.items()}
no_yellow_viridis = matplotlib.colors.ListedColormap(cm.get_cmap('viridis', 256)(np.linspace(0,0.8,256)))
k_formatter = matplotlib.ticker.FuncFormatter(lambda x, _: f'{x:.0f}' if abs(x) < 1000 else f'{x/1000:.0f}k')
fold_formatter = matplotlib.ticker.FuncFormatter(lambda x, _: f'{x}x')
hours_formatter = matplotlib.ticker.FuncFormatter(lambda x,_: f'{x/3600:.0f}')
hours_1f_formatter = matplotlib.ticker.FuncFormatter(lambda x,_: f'{x/3600:.1f}')
kb_formatter = matplotlib.ticker.FuncFormatter(lambda x,_: f'{x/0.34/1000:.1f}')

In [None]:
base_model_df = pd.read_parquet(
    rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'fig_base_model_summary.gzip.0')
)
for i in range(1,8):
    base_model_df = base_model_df.append(
        pd.read_parquet(rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/f'fig_base_model_summary.gzip.{i}')),
        ignore_index=True
    )
print(f'Loaded {len(base_model_df)} total simulation runs from Parquet cached file')

In [None]:
# Plot BCs
fig_bm_palette = {
    'fig.bm.tandem_reporter_upstream': main_palette['tandem'],
    'fig.bm.tandem_reporter_downstream': main_palette['tandem_alt'],
    'fig.bm.convergent': main_palette['convergent'],
    'fig.bm.divergent': main_palette['divergent']
}
df_bm = base_model_df[
    (
        (np.abs(base_model_df['coeff.sigma_squared'] - 0.025) < 0.001) &
        (base_model_df['rates.sc_dependent'] == 1.0)
    ) & (
        (base_model_df['condition'] == 'fig.bm.tandem_reporter_upstream') |
        (base_model_df['condition'] == 'fig.bm.tandem_reporter_downstream') |
        (base_model_df['condition'] == 'fig.bm.convergent') |
        (base_model_df['condition'] == 'fig.bm.divergent')
    ) & (
        base_model_df.is_plasmid == 0.0
    )
].copy()
df_bm['condition'] = df_bm['condition'].cat.remove_unused_categories()
df_bm['norm_induction'] = df_bm['induction'] / (1.0 / 120.0)
df_bm['constant_mRNA'] = df_bm['constant_mRNA'] / 250.0
for stage in range(1,4):
    mask = (
        (df_bm['condition'] == 'fig.bm.tandem_reporter_upstream') |
        (df_bm['condition'] == 'fig.bm.tandem_reporter_downstream')
    )
    if stage >= 2:
        mask = mask | (df_bm['condition'] == 'fig.bm.convergent')
    if stage >= 3:
        mask = mask | (df_bm['condition'] == 'fig.bm.divergent')
    sns.lineplot(data=df_bm[mask],
                x='norm_induction', y='constant_mRNA', hue='condition',
                palette=fig_bm_palette, legend=False, linewidth=4)
    sns.despine()
    plt.xlim([0.0,2.5])
    plt.ylim([0,1.25])
    plt.xticks([0.0,1.0,2.0], ['0x', '1x', '2x'])
    plt.xlabel('Adjacent gene fold induction')
    plt.ylabel('Reporter output')
    plt.text(0.18, 1.12, "Tandem (←)", color=main_palette['tandem'])
    plt.text(0.2, 0.25, "Tandem (→)", color=main_palette['tandem_alt'])
    if stage >= 2:
        plt.text(1.1, 0.4, "Convergent", color=main_palette['convergent'])
    if stage >= 3:
        plt.text(0.8, 0.9, "Divergent", color=main_palette['divergent'])
    plt.savefig(rd.outfile(rd.rootdir/'img'/f'seminar_linear_bc_bulk_{stage}.svg'), bbox_inches='tight')
    plt.show()

In [None]:
with h5py.File(rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'sigma_sweep.h5'), 'r') as sigma_h5:
    alpha_sweep_df = pd.concat([
        pd.DataFrame({
            'sigma': sigma_h5['sweep']['sigma'],
            'alpha': sigma_h5['sweep']['s2_coeff'][a_idx],
            'energy': sigma_h5['sweep']['values'][a_idx,:],
            'torque': sigma_h5['sweep']['values'][a_idx,:] / (1.2 * 2 * np.pi)
        }) for a_idx in range(len(sigma_h5['sweep']['s2_coeff']))
    ])

In [None]:
plt.figure(figsize=(8,2.5))
for alpha, color in zip([0.0], [main_palette['gray'], main_palette['gray']]):
    closest_alpha_df = alpha_sweep_df[
        np.abs(alpha_sweep_df.alpha - alpha) == min(np.abs(alpha_sweep_df.alpha - alpha))
    ].copy()
    plt.plot(closest_alpha_df['sigma'], closest_alpha_df['torque'], color=color)
sns.despine(left=False, bottom=True, top=False, right=True)
plt.xlim([-0.2,0.2])
plt.ylim([-35,35])
plt.xticks([-0.2,0.2])
plt.yticks([-30,30])
alpha_ax = plt.gca()
alpha_ax.tick_params(direction='inout', length=10, pad=1)
alpha_ax.yaxis.tick_left()
alpha_ax.xaxis.tick_top()
alpha_ax.spines['top'].set_position('zero')
alpha_ax.spines['left'].set_position('zero')
alpha_ax.text(0.22, 0, 'σ', verticalalignment='center')
alpha_ax.text(0, 40, 'Torque (pN nm)', horizontalalignment='center')
plt.savefig(rd.outfile(rd.rootdir/'img'/'seminar_torque.svg'), bbox_inches='tight')
plt.show()

In [None]:
for alphas, label in zip([[0.0], [0.0, 0.025]], ['energy_initial', 'energy_corrected']):
    plt.figure(figsize=(8,2.5))
    for alpha, color in zip(alphas, [main_palette['light_gray'], 'k']):
        closest_alpha_df = alpha_sweep_df[
            np.abs(alpha_sweep_df.alpha - alpha) == min(np.abs(alpha_sweep_df.alpha - alpha))
        ].copy()
        plt.plot(closest_alpha_df['sigma'], closest_alpha_df['energy'], color=color)
    sns.despine(left=False, bottom=True, top=False, right=True)
    plt.xlim([-0.2,0.2])
    plt.ylim([-250,250])
    plt.xticks([-0.2,0.2])
    plt.yticks([-208.4,208.4], ['-30','30'])
    alpha_ax = plt.gca()
    alpha_ax.tick_params(direction='inout', length=10, pad=1)
    alpha_ax.yaxis.tick_left()
    alpha_ax.xaxis.tick_top()
    alpha_ax.spines['top'].set_position('zero')
    alpha_ax.spines['left'].set_position('zero')
    alpha_ax.text(0.22, 0, 'σ', verticalalignment='center')
    alpha_ax.text(0, 280, 'Energy (kcal / mol)', horizontalalignment='center')
    plt.savefig(rd.outfile(rd.rootdir/'img'/f'seminar_{label}.svg'), bbox_inches='tight')
    plt.show()

In [None]:
bursting_df = pd.read_parquet(
    rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'fig_bursting.gzip')
)
print(f'Loaded {len(bursting_df)} total simulation runs from Parquet cached file')
available_induction = bursting_df[bursting_df.condition == 'fig.bm.sc_bursts.convergent'].induction
selected_induction = [np.min(available_induction), 1/120.0, np.max(available_induction)]
bursting_df_selected = bursting_df[
    ((bursting_df.induction) == selected_induction[0]) |
    ((bursting_df.induction) == selected_induction[1]) |
    ((bursting_df.induction) == selected_induction[2])
]
conditions = [f'fig.bm.sc_bursts.{x}' for x in ['convergent', 'divergent']]
bm_bursting_df = bursting_df_selected[
    (bursting_df_selected.burst_threshold == 30) &
    (bursting_df_selected.inter_rnap_gene == 2) &
    (bursting_df_selected.burst_size > 0) &
    (bursting_df_selected.burst_duration > 0) &
    (bursting_df_selected.inter_burst_time > 0) &
    (
        ((bursting_df_selected.condition) == conditions[0]) |
        ((bursting_df_selected.condition) == conditions[1])
    )
].copy()
bm_bursting_df.condition = bm_bursting_df.condition.cat.remove_unused_categories()

In [None]:
for stage in range(1,4):
    sns.kdeplot(data=bm_bursting_df[(bm_bursting_df.condition == 'fig.bm.sc_bursts.convergent') & (bm_bursting_df.induction == 0.0)], x='burst_size',
        common_norm=False, color=main_palette['gray'], linewidth=3,
        legend=None, log_scale=True)
    plt.text(10, 1.1, "Reporter only", color=main_palette['gray'])
    if stage >= 2:
        sns.kdeplot(data=bm_bursting_df[(bm_bursting_df.condition == 'fig.bm.sc_bursts.convergent') & (bm_bursting_df.induction == 2.0 / 120.0)], x='burst_size',
            common_norm=False, color=main_palette['convergent'], linewidth=3,
            legend=None, log_scale=True)
        plt.text(3, 1.5, "Convergent", color=main_palette['convergent'])
    if stage >= 3:
        sns.kdeplot(data=bm_bursting_df[(bm_bursting_df.condition == 'fig.bm.sc_bursts.divergent') & (bm_bursting_df.induction == 2.0 / 120.0)], x='burst_size',
            common_norm=False, color=main_palette['divergent'], linewidth=3,
            legend=None, log_scale=True)
        plt.text(200, 0.55, "Divergent", color=main_palette['divergent'])
    plt.xlim([1, 3e3])
    plt.ylim([0,1.8])
    plt.xlabel('Burst size')
    sns.despine()
    plt.savefig(rd.outfile(rd.rootdir/'img'/f'seminar_burst_size_{stage}.svg'), bbox_inches='tight')
    plt.show()

In [None]:
for stage in range(1,4):
    sns.kdeplot(data=bm_bursting_df[(bm_bursting_df.condition == 'fig.bm.sc_bursts.convergent') & (bm_bursting_df.induction == 0.0)], x='inter_burst_time',
        common_norm=False, color=main_palette['gray'], linewidth=3,
        legend=None, log_scale=True)
    plt.text(100, 1.1, "Reporter only", color=main_palette['gray'])
    if stage >= 2:
        sns.kdeplot(data=bm_bursting_df[(bm_bursting_df.condition == 'fig.bm.sc_bursts.convergent') & (bm_bursting_df.induction == 2.0 / 120.0)], x='inter_burst_time',
            common_norm=False, color=main_palette['convergent'], linewidth=3,
            legend=None, log_scale=True)
        plt.text(400, 0.5, "Convergent", color=main_palette['convergent'])
    if stage >= 3:
        sns.kdeplot(data=bm_bursting_df[(bm_bursting_df.condition == 'fig.bm.sc_bursts.divergent') & (bm_bursting_df.induction == 2.0 / 120.0)], x='inter_burst_time',
            common_norm=False, color=main_palette['divergent'], linewidth=3,
            legend=None, log_scale=True)
        plt.text(30, 1.45, "Divergent", color=main_palette['divergent'])
    plt.xlim([15, 5e3])
    plt.ylim([0,1.55])
    plt.xlabel('Inter-burst time (s)')
    sns.despine()
    plt.savefig(rd.outfile(rd.rootdir/'img'/f'seminar_inter_burst_time_{stage}.svg'), bbox_inches='tight')
    plt.show()

In [None]:
fig_zinani_examples = pd.read_parquet(rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'fig_zinani_examples.gzip', 'fig_zinani'))
print(f'Loaded {len(fig_zinani_examples)} total example datapoints from Parquet cached file')
fig_zinani_matlab_examples = pd.read_parquet(rd.infile(rd.datadir/'projects'/'tangles'/'simulations'/'fig_zinani_matlab_examples.gzip', 'fig_zinani'))
print(f'Loaded {len(fig_zinani_matlab_examples)} total example datapoints from Parquet cached file')
fig_zinani_palette = {
    'her1': '#5254a3',
    'her7': '#9c9ede',
    'matlab.Unpaired': '#b5cf6b',
    'matlab.Paired': '#637939',
    'fig.zinani.uncoupled': '#b5cf6b',
    'fig.zinani.fully-coupled': '#637939',
    'fig.zinani.tangles-coupled': main_palette['divergent'],
    'gray': main_palette['gray'],
    'background_gray': '#DDDDDD'
    }

In [None]:
plot_alpha = 0.025
# Plot Zinani examples
df = fig_zinani_examples[(fig_zinani_examples.condition == 'fig.zinani.uncoupled') & (fig_zinani_examples.alpha == plot_alpha)]
idx = df.idx.unique()[1]
sns.lineplot(data=df[df.idx == idx], x='time', y='her1_mRNA', color=fig_zinani_palette['her1'])
sns.lineplot(data=df[df.idx == idx], x='time', y='her7_mRNA', color=fig_zinani_palette['her7'])
sns.despine()
plt_ax = plt.gca()
plt_ax.set_xlim([0, 15000])
plt_ax.set_xticks([0, 3600 * 2, 3600 * 4])
plt_ax.xaxis.set_major_formatter(hours_1f_formatter)
plt_ax.set_xlabel('')
plt_ax.set_ylabel('')
plt_ax.set_facecolor(fig_zinani_palette['fig.zinani.uncoupled'] + '55')
plt.ylabel('mRNA counts')
plt.xlabel('Time (hr)')
plt.title('Dimer coupling only')
plt.savefig(rd.outfile(rd.rootdir/'img'/'zinani_uncoupled.svg'), bbox_inches='tight')
plt.show()

matlab_df = fig_zinani_matlab_examples[fig_zinani_matlab_examples.condition == 'matlab.Paired']
idx = matlab_df.idx.unique()[1]
sns.lineplot(data=matlab_df[matlab_df.idx == idx], x='time', y='her1', color=fig_zinani_palette['her1'])
sns.lineplot(data=matlab_df[matlab_df.idx == idx], x='time', y='her7', color=fig_zinani_palette['her7'])
sns.despine()
plt_ax = plt.gca()
plt_ax.set_xlim([0, 15000])
plt_ax.set_xticks([0, 3600 * 2, 3600 * 4])
plt_ax.xaxis.set_major_formatter(hours_1f_formatter)
plt_ax.set_xlabel('')
plt_ax.set_ylabel('')
plt_ax.set_facecolor(fig_zinani_palette['matlab.Paired'] + '55')
plt.ylabel('mRNA counts')
plt.xlabel('Time (hr)')
plt.title('Transcript coupled (Zinani)')
plt.savefig(rd.outfile(rd.rootdir/'img'/'zinani_transcript_coupled.svg'), bbox_inches='tight')
plt.show()

df = fig_zinani_examples[(fig_zinani_examples.condition == 'fig.zinani.tangles-coupled') & (fig_zinani_examples.alpha == plot_alpha)]
idx = df.idx.unique()[1]
sns.lineplot(data=df[df.idx == idx], x='time', y='her1_mRNA', color=fig_zinani_palette['her1'])
sns.lineplot(data=df[df.idx == idx], x='time', y='her7_mRNA', color=fig_zinani_palette['her7'])
sns.despine()
plt_ax = plt.gca()
plt_ax.set_ylim([0,90])
plt_ax.set_xlim([0, 15000])
plt_ax.set_xticks([0, 3600 * 2, 3600 * 4])
plt_ax.xaxis.set_major_formatter(hours_1f_formatter)
plt_ax.set_xlabel('')
plt_ax.set_ylabel('')
plt_ax.set_facecolor(fig_zinani_palette['fig.zinani.tangles-coupled'] + '55')
plt.ylabel('mRNA counts')
plt.xlabel('Time (hr)')
plt.title('Biophysical and dimer coupling')
plt.savefig(rd.outfile(rd.rootdir/'img'/'zinani_biophysically_coupled.svg'), bbox_inches='tight')
plt.show()

In [None]:
def dox_string_to_float(s):
    # Handle '0'
    if s == '0':
        return 0.0
    # Handle E-1.5 and friends
    if s.startswith('E'):
        return 10**float(s[1:])
    # Handle BASEeEXP (1e-1.5)
    e_split = s.lower().split('e')
    if len(e_split) == 2:
        return float(e_split[0]) * (10**float(e_split[1]))
    raise RuntimeError(f"Unable to convert dox string: '{s}' to float.")

In [None]:

simulation_df = []
for filename in Path(rd.rootdir/'output'/'julia_summaries').glob('*'):
    with h5py.File(rd.infile(filename), 'r') as file:
        for run in file.values():
            simulation_df.append(pd.DataFrame(data={'eGFP': run['final_mRNA'][0,:], 'mRuby2': run['final_mRNA'][1,:]}))
            simulation_df[-1]['topo_rate'] = run.attrs['rates.topo']
            simulation_df[-1]['induction'] = run.attrs['gene.base_rate'][0]
            simulation_df[-1]['condition'] = run.attrs['comment'].decode('utf-8')
            simulation_df[-1]['is_plasmid'] = run.attrs['bcs.is_circular']
            simulation_df[-1]['sc_dependent'] = run.attrs['rates.sc_dependent']
simulation_df = pd.concat(simulation_df)

In [None]:
# Import our three datasets
n_gene_regex = re.compile(r'^293T\.(?P<condition>\d-gene(?:\.\w+)?)\.(?P<dox>.*)-dox\.(?P<rep>\d)-rep_Single Cells')
control_regex = re.compile(r'^293T\.(?P<condition>[^.]+)\.(?P<rep>\d)-rep_Single Cells')
cell_transfection_regex = re.compile(r'^export_(?P<celltype>[^.]+)\.(?P<condition>\dg\.\w+)\.(?P<dox>.*)dox\.r(?P<rep>\d)_Single Cells')
pb_regex = re.compile(r'^export_(?P<celltype>[^.]+)\.(?P<condition>(\d[gc]\.\w+|\w+))(.\d)?\.(?P<dox>.*)dox\.r(?P<rep>\d)_Single Cells')
first_transfection_dataset = []
control_dataset = []
cell_transfection_dataset = []
pb_dataset = []
for file in Path(rd.rootdir/'output'/'flow'/'2021.05.07-T2.0').glob('*.csv'):
    exp_match = n_gene_regex.match(file.stem)
    if exp_match is not None:
        first_transfection_dataset.append(pd.read_csv(rd.infile(file)))
        first_transfection_dataset[-1]['celltype'] = '293T'
        first_transfection_dataset[-1]['condition'] = exp_match.group('condition')
        dox_str = exp_match.group('dox')
        first_transfection_dataset[-1]['dox'] = dox_string_to_float(dox_str)
        first_transfection_dataset[-1]['replicate'] = int(exp_match.group('rep'))
for file in Path(rd.rootdir/'output'/'flow'/'2021.05.07-T2.0').glob('*.csv'):
    control_match = control_regex.match(file.stem)
    if control_match is not None:
        control_dataset.append(pd.read_csv(rd.infile(file)))
        control_dataset[-1]['celltype'] = '293T'
        control_dataset[-1]['condition'] = control_match.group('condition')

for file in Path(rd.rootdir/'output'/'flow'/'2021.10.18-transfection'/'export').glob('*.csv'):
    match = cell_transfection_regex.match(file.stem)
    if match is not None:
        dataset = pd.read_csv(rd.infile(file))
        if dataset['mRuby2-A'].dtype != np.int64:
            continue
        cell_transfection_dataset.append(dataset)
        cell_transfection_dataset[-1]['celltype'] = match.group('celltype')
        cell_transfection_dataset[-1]['condition'] = match.group('condition').replace('g.','-gene.')
        cell_transfection_dataset[-1]['dox'] = dox_string_to_float(match.group('dox'))
        cell_transfection_dataset[-1]['replicate'] = int(match.group('rep'))
for file in Path(rd.rootdir/'output'/'flow'/'2021.11.07-flow_tangles_integration').glob('*.csv'):
    match = pb_regex.match(file.stem)
    if match is not None:
        pb_dataset.append(pd.read_csv(rd.infile(file)))
        pb_dataset[-1]['celltype'] = match.group('celltype')
        # Fixup condition
        condition = match.group('condition')
        if condition[1] == 'c':
            condition = condition[0] + 'g' + condition[2:]
        pb_dataset[-1]['condition'] = condition.replace('g.','-gene.')
        pb_dataset[-1]['dox'] = dox_string_to_float(match.group('dox'))
        pb_dataset[-1]['replicate'] = int(match.group('rep'))
first_transfection_dataset = pd.concat(first_transfection_dataset, ignore_index=True)
control_dataset = pd.concat(control_dataset, ignore_index=True)
cell_transfection_dataset = pd.concat(cell_transfection_dataset, ignore_index=True)
pb_dataset = pd.concat(pb_dataset, ignore_index=True)

In [None]:
# Postprocess
mRuby2_control_intensity = sorted(control_dataset[(control_dataset.condition == 'iRFP670') & (control_dataset['iRFP670-A'] > 2e2)]['mRuby2-A'])
mRuby2_gate = mRuby2_control_intensity[int(len(mRuby2_control_intensity) * 0.95)]
gated_transfection = cell_transfection_dataset[
    (cell_transfection_dataset['iRFP670-A'] > 2e2) &
    (cell_transfection_dataset['mRuby2-A'] > mRuby2_gate)].copy()
gated_initial_dataset = first_transfection_dataset[first_transfection_dataset['iRFP670-A'] > 2e2].copy()
gated_pb = pb_dataset[pb_dataset['iRFP670-A'] > 2e2].copy()

In [None]:
plot_df = gated_transfection[(gated_transfection['mRuby2-A'] > 1) & (gated_transfection['eGFP-A'] > 1)].copy()
filter_set = plot_df['dox'].unique()[1::2]
plot_df = plot_df[plot_df['dox'].apply(lambda x: x==0 or x in filter_set)]
# Filter out the EGFP/mRuby2 conditions
plot_df = plot_df[plot_df['condition'].apply(lambda x: x[1] == '-')]
plot_df['log mRuby2-A'] = np.log10(plot_df['mRuby2-A'])
plot_df['log eGFP-A'] = np.log10(plot_df['eGFP-A'])
plot_df['recentered_mRuby2'] = plot_df['log mRuby2-A'] - 2
plot_df['recentered_EGFP'] = plot_df['log eGFP-A'] - 1.2
g = sns.jointplot(data=plot_df[(plot_df.condition == '3-gene.Divergent') & ((plot_df.dox == 0.0) | (plot_df.dox == np.max(plot_df.dox)))],
              x='recentered_EGFP', y='recentered_mRuby2', hue='dox', fill=True, kind='kde', alpha=0.5, legend=None,
              palette = {0.0:main_palette['gray'], np.max(plot_df.dox):main_palette['divergent']}, height=1.5)
g.ax_marg_x.remove()
g.ax_joint.text(0.2, 3.0, 'Reporter only', color=main_palette['gray'])
g.ax_joint.text(2.5, 0.6, 'Adjacent\nexpression', color=main_palette['divergent'])
g.ax_joint.set_ylim([0,3.5])
g.ax_joint.set_xlim([-0.5,4])
g.ax_joint.set_xlabel('Adjacent expression')
g.ax_joint.set_ylabel('Reporter expression')
sns.despine()
plt.savefig(rd.outfile(rd.rootdir/'img'/'exp_divergent_paperscale.svg'), bbox_inches='tight')
plt.show()

In [None]:
plot_df = simulation_df.copy()
plot_df = plot_df[(plot_df['condition'] == '2_gene.divergent') & (plot_df['is_plasmid'] == 1.0) & (plot_df['induction'] < .008) & (plot_df['sc_dependent'] == 1.0)]
plot_df['log mRuby2'] = np.log10(plot_df['mRuby2'] + 1)
plot_df['log eGFP'] = np.log10(plot_df['eGFP'] + 1)
plot_df['recentered_EGFP'] = plot_df['log eGFP'] + 0.5
g = sns.jointplot(data=plot_df[(np.abs(plot_df.induction - 1.481e-3) < 1e-4) | (np.abs(plot_df.induction - 6.249e-3) < 1e-4)],
              x='recentered_EGFP', y='log mRuby2', hue='induction', fill=True, kind='kde', alpha=0.5, legend=None,
              palette = {plot_df.induction.unique()[2]:main_palette['gray'], plot_df.induction.unique()[3]:main_palette['divergent']})
g.ax_marg_x.remove()
g.ax_joint.text(-0.3, 1.6, 'Reporter-only', color=main_palette['gray'])
g.ax_joint.text(2.5, 2.6, 'Adjacent\nexpression', color=main_palette['divergent'])
g.ax_joint.set_ylim([0,3.5])
g.ax_joint.set_xlim([-0.5,4])
g.ax_joint.set_xlabel('Simulated log adjacent gene')
g.ax_joint.set_ylabel('Simulated log reporter gene')
sns.despine()
plt.savefig(rd.outfile(rd.rootdir/'img'/'sim_divergent.svg'), bbox_inches='tight')
plt.show()

In [None]:
plt.rcParams['animation.ffmpeg_path'] = r'C:\Users\ChemeGrad2019\Downloads\ffmpeg-2020-12-09-git-7777e5119a-full_build\bin\ffmpeg.exe'
#sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})
ffmpeg_writer = animation.FFMpegWriter(fps=30, codec='libx264', extra_args=['-crf', '18', '-pix_fmt', 'yuv420p'])
movie_examples = h5py.File(rd.infile(rd.rootdir/'output'/'seminar_examples.h5'))

In [None]:
movie_examples.keys()

In [None]:
raw_points = '-1,0 -1,0.5 -0.1,0.5 0,0.5 0.1,0.5 1,0.5 1,0 1,-0.5 0.1,-0.5 0,-0.5 -0.1,-0.5 -1,-0.5 -1,0 0,0'
points = np.array([(float(point.split(',')[0]), float(point.split(',')[1])) for point in raw_points.split(' ')])
scaled_points = np.copy(points) * 0.6
scaled_points[:,1] -= 0.5
all_points = np.concatenate((scaled_points, points), axis=0)
all_points[:,1] += 0.15
ellipse_path = matplotlib.path.Path(all_points,
    codes=([matplotlib.path.Path.MOVETO] + [matplotlib.path.Path.CURVE4]*12 + [matplotlib.path.Path.CLOSEPOLY]) * 2
)
def gen_animation(data_group, gene_colors, ex_offset, filename, n_frames, dpi=120, mRNA_remap=None, tspan=None):
    genes = list(zip(data_group.attrs['gene.start'], data_group.attrs['gene.end']))
    gene_directions = [1 - 2 * (gene[0] > gene[1]) for gene in genes]

    times = np.array(data_group['time'])
    mRNA_data = np.array(data_group['mRNA'])
    if tspan is None:
        tspan = (0, max(data_group['time']))
    max_length = data_group.attrs['bcs.length']
    max_gene_length = max([abs(gene[1] - gene[0]) for gene in genes])

    fig = plt.figure(figsize=(1400 / dpi,700 / dpi))
    plot_ax = plt.subplot2grid((2,2), (0,0))
    sigma_ax = plt.subplot2grid((2,2), (1,0))
    mRNA_ax = plt.subplot2grid((2,2), (0,1), rowspan=2)

    plot_ax.set_xlim([0,max_length])
    plot_ax.set_ylim([-20, 100 + max_gene_length])
    plot_ax.spines['top'].set_visible(False)
    plot_ax.spines['right'].set_visible(False)
    plot_ax.spines['left'].set_visible(False)
    plot_ax.tick_params(left=False, labelleft=False)
    plot_ax.set_xticks(np.arange(0.0, 12.5, 2.5) * 1000 * 0.34)
    plot_ax.xaxis.set_major_formatter(matplotlib.ticker.NullFormatter())

    sigma_ax.set_xlim([0,max_length])
    sigma_ax.set_ylim([-0.6, 0.6])
    sigma_ax.spines['top'].set_visible(False)
    sigma_ax.spines['right'].set_visible(False)
    sigma_ax.set_xticks(np.arange(0.0, 12.5, 2.5) * 1000 * 0.34)
    sigma_ax.xaxis.set_major_formatter(kb_formatter)
    sigma_ax.set_xlabel('Distance (kb)')
    sigma_ax.set_ylabel('σ')
    sigma_ax.text(50, 0.4, 'Overwound')
    sigma_ax.text(50, -0.5, 'Underwound')

    mRNA_ax.set_xlim(tspan)
    mRNA_ax.set_ylim([0,np.max(data_group['mRNA'])])
    mRNA_ax.spines['top'].set_visible(False)
    mRNA_ax.spines['right'].set_visible(False)
    mRNA_ax.set_xlabel('Time (hr)')
    mRNA_ax.set_ylabel('# mRNAs')
    mRNA_ax.set_xticks([0, 10000, 3600 * 6])
    mRNA_ax.xaxis.set_major_formatter(hours_1f_formatter)

    fig.tight_layout()

    sigma_ax.axhline(0, color=main_palette['axis_gray'])
    sigma_line = sigma_ax.plot([], [], linewidth=3, color='#3d8c7d')[0]

    mRNA_ax.axvspan(10000, max(times), color=main_palette['light_gray'] + '55')
    if mRNA_remap is None:
        gene_lines = [mRNA_ax.plot([], [], color=color)[0] for gene, color in zip(genes, gene_colors)]
    else:
        gene_lines = [mRNA_ax.plot([], [], color=color)[0] for gene, color in zip(genes, [gene_colors[mRNA_remap[i]] for i in range(len(gene_colors))])]

    xax_transform = plot_ax.get_xaxis_transform()
    for gene, gene_dir, color in zip(genes, gene_directions, gene_colors):
        base = gene[0] - (max_length * 0.34 * 0.01)
        plot_ax.plot([base, base], [20/(120 + max_gene_length), 0.33], linewidth=5, solid_capstyle='projecting', color=color, transform=xax_transform, clip_on=False)
        plot_ax.plot([base, base + gene_dir * (max_length * 0.34 * 0.06)], [0.33, 0.33], linewidth=5, solid_capstyle='projecting', color=color, transform=xax_transform, clip_on=False)
        plot_ax.add_artist(matplotlib.patches.FancyArrow(base + gene_dir * (max_length * 0.34 * 0.06), 0.33, gene_dir, 0,
            width=0.1, head_width=0.1, head_length=max_length * 0.34 * 0.05, color=color, transform=xax_transform, clip_on=False))


    plot_ax.plot([0, max_length], [0, 0], color=main_palette['light_gray']+'55', linewidth=14, zorder=0, solid_capstyle='butt')
    
    gene_plots = [plot_ax.plot(gene, [0, 0], color=color,linewidth=14, zorder=0, solid_capstyle='butt')[0] for gene, color in zip(genes, gene_colors)]

    mRNA_lines = matplotlib.collections.LineCollection([], colors='#d18cca')

    polymerases = plot_ax.plot([], [], marker=ellipse_path, ls='',
                        mec='#3d8c7d', mfc='#82d4bd', mew=2, ms=15, zorder=2.5)[0]

    def init():
        mRNA_lines.set_linewidth(3)
        plot_ax.add_collection(mRNA_lines)
        return (*gene_plots, mRNA_lines)
    
    def animate(i):
        t = tspan[0] + (tspan[1] - tspan[0]) * (i / n_frames)
        possible_idxes = np.where(times < t)[0]
        t_idx = possible_idxes[-1] if len(possible_idxes) > 0 else 0
        frame_vals = {k:data_group[k][:,t_idx] for k in ['rnap_location', 'phi', 'mRNA_length', 'mRNA']}
        n_polymerases = np.sum(frame_vals['rnap_location'] >= 0)

        # Plot mRNA data values

        # Plot mRNA values (on the right-hand plot)
        gene_time_mask = (times < t)
        for i, gene_line in enumerate(gene_lines):
            gene_line.set_data(times[gene_time_mask], mRNA_data[i,gene_time_mask])

        # Set visibility
        polymerases.set_visible(n_polymerases > 0)
        mRNA_lines.set_visible(n_polymerases > 0)

        # Plot polymerases and genes coming off polymerases.
        if n_polymerases > 0:
            rnap_locs = np.concatenate((
                np.zeros(1,),
                frame_vals['rnap_location'][:n_polymerases],
                np.ones(1,) * max_length
            ))
            phi = np.concatenate((
                np.zeros(1,),
                frame_vals['phi'][:n_polymerases],
                np.zeros(1,)
            ))
            sigma = np.diff(phi) / np.diff(rnap_locs) / -1.85

            # Set supercoiling line
            sigma_line.set_data(
                np.concatenate(([rnap_locs[0]], [loc for loc in rnap_locs[1:-1] for _ in range(2)], [rnap_locs[-1]])),
                [s for s in sigma for _ in range(2)]
            )

            points = np.array([rnap_locs, np.zeros_like(rnap_locs)]).T.reshape(-1,1,2)
            segments = np.concatenate([points[:-1], points[1:]], axis=1)
            polymerases.set_data(rnap_locs[1:-1], np.zeros_like(rnap_locs[1:-1]))
            mRNA_base = np.array([rnap_locs[1:-1], np.zeros(rnap_locs[1:-1].shape)]).T.reshape(-1,1,2)
            mRNA_tail = np.array([rnap_locs[1:-1], frame_vals['mRNA_length'][:n_polymerases]]).T.reshape(-1,1,2)
            mRNA_segments = np.concatenate([mRNA_base, mRNA_tail], axis=1)
            mRNA_lines.set_segments(mRNA_segments)
        else:
            sigma_line.set_data([0, max_length], [0,0])
        return [sigma_line, polymerases, mRNA_lines] + list(gene_lines)
    anim = animation.FuncAnimation(fig, animate, init_func=init, frames=n_frames, interval=33, blit=True)
    anim.save(str(filename), dpi=dpi, writer=ffmpeg_writer, progress_callback= lambda i,n: print(f'Saving frame {i} of {n}') if i % 250 == 0 else None)

In [None]:
gen_animation(movie_examples['tangles_full_run.000018'], [main_palette['tandem'], main_palette['gray']], 0, rd.outfile(rd.rootdir/'output'/'test_out_tandem_upstream.mp4'), 15*30, tspan=[0,3600*6], mRNA_remap={0:1, 1:0})
gen_animation(movie_examples['tangles_full_run.000022'], [main_palette['gray'], main_palette['tandem_alt']], 1, rd.outfile(rd.rootdir/'output'/'test_out_tandem_downstream.mp4'), 15*30, tspan=[0,3600*6])
gen_animation(movie_examples['tangles_full_run.000025'], [main_palette['gray'], main_palette['convergent']], 2, rd.outfile(rd.rootdir/'output'/'test_out_convergent.mp4'), 15*30, tspan=[0,3600*6])
gen_animation(movie_examples['tangles_full_run.000027'], [main_palette['gray'], main_palette['divergent']], 3, rd.outfile(rd.rootdir/'output'/'test_out_divergent.mp4'), 15*30, tspan=[0,3600*6])