In [None]:
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import pandas as pd
import holoviews as hv
import seaborn as sns
import gc
hv.extension('bokeh')
from holoviews.operation.datashader import datashade
import bokeh
import bokeh.plotting
import bokeh.palettes
import bokeh.models
import scipy

from tqdm import tqdm

import stochastic_sc_model as ssc_model

# Ensure we don't hit floating point errors
np.seterr('raise')
np.seterr(under='ignore')

colors20 = bokeh.palettes.d3['Category20'][20]

The problem is defined by the location of the currently active polymerases. Each polymerase is characterized by:
1. It's location
2. The length of the nascent mRNA tail ($x$)
3. The DNA twist at this location ($\phi$)
4. The RNAC at this location ($\theta$)

We actually only need to track the first three; the fourth is completely determined by the first three.

Given this state information, we can compute the relaxed supercoiling, which in turn gives us information on the 

In [None]:
params = {
    'mRNA_drag': 1/20, # pN nm^(alpha / 1)
    'mRNA_exponent': 1, # the value of alpha
    'DNA_twist_mobility': 10, # s pN nm
    'RNAP_radius': 15, # nm
    'RNAP_velocity': 20, # nm / s
    'RNAP_torque_cutoff': 12, # pN nm
    'RNAP_stall_torque_width': 3, #pN
    'DNA_force': 1, # pN
    'DNA_bend_plength': 50, # pN
    'DNA_twist_plength': 95, # pN
    'DNA_plectonome_twist_plength': 24, # pN
    'temperature': 298, # K
    'base_promoter_initiation_rate': 1 / 120, # 1 / sec
    'topo_rate': 1 / 1200, # 1 / sec
    'mRNA_deg_rate': 1 / 1200 # 1 / sec
}

Using a conversion of 1bp = .34nm, we need to investigate our different elements.
The upstream RFP-CYMR ORF is 975bp. WPRE is 589bp, with the polyA signal being 200bp.
The downstream rTTa/IRES/mVenus is 2161bp.

The tandem case is:
upstream-WPRE-polyA-1212bp spacer (UbC)-rTTA combo

The convergent case is:
upstream-WPRE-polyA-589 bp spacer (WPRE)-rTTA combo

Let's make both of the edges a rigid barrier, with that barrier being ~3000bp away on each side (2000 bp viral LTRs, plus 1kb)



In [None]:
# Create mesh of expression values
mesh_x, mesh_y = np.meshgrid(np.logspace(-1,0,5), np.logspace(-1,0,5))

In [None]:
RFP_start = 3000 * .34;
RFP_end = RFP_start + (975 + 589 + 200) * .34
mVenus_end = RFP_end + (589 * .34)
mVenus_start = mVenus_end + (2161 * .34)
end_barrier = mVenus_end + (3000 * .34)
convergent = pd.concat([
    ssc_model.bulk_simulation(params, ((0,0), (end_barrier, 0)),[
                            (RFP_start, RFP_end, rfp_strength),
                            (mVenus_start, mVenus_end, mVenus_strength)],
                ['RFP', 'mVenus'], (0, 12000, 1000), 500)
    for rfp_strength, mVenus_strength in tqdm(zip(mesh_x.flatten(), mesh_y.flatten()))])
convergent.to_feather('convergent_promoter_strength_grid.feather')

In [None]:
convergent = pd.read_feather('convergent_promoter_strength_grid.feather')

In [None]:
convergent['rfp_venus_stoich_ratio'] = convergent['RFP_promoter_strength'] / convergent['mVenus_promoter_strength']
convergent['total_promoter_strength'] = convergent['RFP_promoter_strength'] + convergent['mVenus_promoter_strength']
ss_timepoint = convergent[abs((convergent['time'] - 11051)) < 1]

In [None]:
equal_promoter_strength = ss_timepoint[ss_timepoint['RFP_promoter_strength'] == ss_timepoint['mVenus_promoter_strength']]
equal_promoter_table = hv.Table(equal_promoter_strength,
                                [('RFP_promoter_strength', 'Promoter strength')],
                                ['RFP_expression', 'mVenus_expression'])

In [None]:
equal_scatter = equal_promoter_table.to.scatter('RFP_expression', 'mVenus_expression')
equal_scatter.overlay('RFP_promoter_strength').opts(
    hv.opts.Scatter(color=hv.Cycle('Category10'), alpha=0.5, size=5),
    hv.opts.NdOverlay(legend_position='top')).options(width=700, height=500)

In [None]:
equal_scatter.hist(['RFP_expression', 'mVenus_expression'])

In [None]:
ss_mean = ss_timepoint.groupby(['mVenus_promoter_strength', 'RFP_promoter_strength']).mean()
ss_mean['rfp_venus_ratio'] = ss_mean['RFP_expression'] / ss_mean['mVenus_expression']

In [None]:
scatter = hv.Scatter(ss_mean.reset_index(), ('rfp_venus_stoich_ratio', 'Promoter strength ratio'),
                     [('rfp_venus_ratio', 'Expression ratio'), 'total_promoter_strength']).opts(
    color='total_promoter_strength', cmap='Viridis', colorbar=True,
    logx=True, logy=True, size=10) * hv.Curve([(0.1,0.1), (10,10)]).opts(color=colors20[2])
scatter = scatter.options(width=700, height=550)
bokeh.io.show(fix_plot_text(hv.render(scatter)))

In [None]:
heatmap = hv.HeatMap(ss_mean, ['mVenus_promoter_strength', 'RFP_promoter_strength'], 'rfp_venus_fold_ratio')
heatmap.opts(colorbar=True, logx=True, logy=True)

In [None]:
ss_mean.loc[:, ['RFP_expression', 'mVenus_expression', 'rfp_venus_ratio']]

In [None]:
no_readthrough_sim = ssc_model.SupercoilingSimulation(params, ((0, 0), (2000, 0)),[
    (500, 800, 1, ['reporter'])])
no_readthrough_sim.enable_topo_relaxation()
run = no_readthrough_sim.postprocess_run(no_readthrough_sim.simulate((0, 2000)))
ssc_model.gen_movie(run, [
    (500, 800, 1)], 30 * 60, 'no_readthrough', 'animation_output', True,
                   x_range_override=(500,1250))

readthrough_sim = ssc_model.SupercoilingSimulation(params, ((0, 0), (2000, 0)),[
    (500, 800, 1, ['reporter'], lambda: np.random.exponential(200), False)])
readthrough_sim.enable_topo_relaxation()
run = readthrough_sim.postprocess_run(readthrough_sim.simulate((0, 2000)))
ssc_model.gen_movie(run, [
    (500, 800, 1)], 30 * 60, 'readthrough_time', 'animation_output', True,
                   x_range_override=(500,1250))

readthrough_distance = ssc_model.SupercoilingSimulation(params, ((0, 0), (2000, 0)),[
    (500, 800, 1, ['reporter'], lambda: np.random.exponential(200), True)])
readthrough_distance.enable_topo_relaxation()
run = readthrough_distance.postprocess_run(readthrough_distance.simulate((0, 2000)))
ssc_model.gen_movie(run, [
    (500, 800, 1)], 30 * 60, 'readthrough_distance', 'animation_output', True,
                   x_range_override=(500,1250))

In [None]:
to_barrier = 3000 # bp
rfp_cymr = 975 # bp
wpre = 589 # bp
polyA = 200 # bp
tandem_ubc_spacer = 1327 # bp
venus_rtta = 2915 # bp

tandem_sim_args = (params, ((0, 0),
   ((to_barrier + rfp_cymr + tandem_ubc_spacer + venus_rtta + to_barrier) * .34, 0)),[
    # Complete readthrough on upstream gene, but low efficency
    (to_barrier * .34, (to_barrier + rfp_cymr + tandem_ubc_spacer + venus_rtta) * .34, .3,
     ['RFP', 'mVenus'], 200, False),
    # Downstream
    ((to_barrier + rfp_cymr + tandem_ubc_spacer) * .34, (to_barrier + rfp_cymr + tandem_ubc_spacer + venus_rtta) * .34,
     1, ['mVenus'], 200, False)])
tandem_sim = ssc_model.SupercoilingSimulation(*tandem_sim_args)
tandem_sim.enable_topo_relaxation()

tandem_wpre_sim_args = (params, ((0, 0),
   ((to_barrier + rfp_cymr + wpre + tandem_ubc_spacer + venus_rtta + to_barrier) * .34, 0)),[
    # Complete readthrough, but lower efficency
    (to_barrier * .34, (to_barrier + rfp_cymr + wpre + tandem_ubc_spacer + venus_rtta) * .34,
    1, ['RFP', 'mVenus'], 200, False),
    # Downstream
    ((to_barrier + rfp_cymr + wpre + tandem_ubc_spacer) * .34,
     (to_barrier + rfp_cymr + wpre + tandem_ubc_spacer + venus_rtta) * .34,
     1, ['mVenus'], 200, False)])
tandem_wpre_sim = ssc_model.SupercoilingSimulation(*tandem_wpre_sim_args)
tandem_wpre_sim.enable_topo_relaxation()

tandem_wpre_polyA_sim_args = (params, ((0, 0),
   ((to_barrier + rfp_cymr + wpre + polyA + tandem_ubc_spacer + venus_rtta + to_barrier) * .34, 0)),[
    # No readthrough
    (to_barrier * .34, (to_barrier + rfp_cymr + wpre + polyA) * .34, 1,
    ['RFP'], 200, False),
    # Downstream
    ((to_barrier + rfp_cymr + wpre + polyA + tandem_ubc_spacer) * .34,
     (to_barrier + rfp_cymr + wpre + polyA + tandem_ubc_spacer + venus_rtta) * .34,
     1, ['mVenus'], 200, False)])
tandem_wpre_polyA_sim = ssc_model.SupercoilingSimulation(*tandem_wpre_polyA_sim_args)
tandem_wpre_polyA_sim.enable_topo_relaxation()

tandem_sims = [(tandem_sim,
                '-WPRE',
                [(to_barrier * .34, (to_barrier + rfp_cymr) * .34),
                 ((to_barrier + rfp_cymr + tandem_ubc_spacer) * .34,
                  (to_barrier + rfp_cymr + tandem_ubc_spacer + venus_rtta) * .34)]),
               (tandem_wpre_sim,
                '+WPRE',
                [(to_barrier * .34, (to_barrier + rfp_cymr) * .34),
                 ((to_barrier + rfp_cymr + wpre + tandem_ubc_spacer) * .34,
                  (to_barrier + rfp_cymr + wpre + tandem_ubc_spacer + venus_rtta) * .34)]),
               (tandem_wpre_polyA_sim,
                '+WPRE+polyA',
                [(to_barrier * .34, (to_barrier + rfp_cymr) * .34),
                 ((to_barrier + rfp_cymr + wpre + polyA + tandem_ubc_spacer) * .34,
                  (to_barrier + rfp_cymr + wpre + polyA +tandem_ubc_spacer + venus_rtta) * .34)])]

In [None]:
for sim, name, genes in tandem_sims:
    single_result = sim.postprocess_run(sim.simulate((0, 12000)))
    print('Success on {}, writing movie'.format(name))
    ssc_model.gen_movie(single_result, genes, 60 * 60, 'tandem' + name, 'animation_output', True)

In [None]:
wpre_sims.reset_index().to_feather('tandem_wpre.feather')
wpre_polyA_sims.reset_index().to_feather('tandem_wpre_polya.feather')

In [None]:
# Make mass dataframe
wpre_sims = ssc_model.bulk_simulation(*tandem_wpre_sim_args, ['RFP', 'mVenus'], (0, 10000, 1000), 10000)
wpre_polyA_sims = ssc_model.bulk_simulation(*tandem_wpre_polyA_sim_args, ['RFP', 'mVenus'], (0, 10000, 1000), 10000)
wpre_sims.reset_index().to_feather('tandem_wpre.feather')
wpre_polyA_sims.reset_index().to_feather('tandem_wpre_polya.feather')

In [None]:
def density_plot(df):
    plots = (hv.Bivariate(df, ['RFP_expression', 'mVenus_expression']))
    return plots

def ci_plot(df, fig, colors, legend):
    """
    Plots onto a Bokeh plot.
    """
    bestfit = scipy.stats.linregress(df['RFP_expression'], df['mVenus_expression'])
    
    fig.scatter('RFP_expression', 'mVenus_expression', source=df, color=colors[0], legend_label=legend,
                 radius=.07)
    fig.line([0, 10], [bestfit.intercept, bestfit.intercept + bestfit.slope * 10], 
             color=colors[1], legend_label=legend + ' linear fit')
    num_bootstraps = 1000
    linfits = [()] * num_bootstraps
    num_points = df.shape[0]
    for i in range(num_bootstraps):
        bootstrap_resample = np.random.choice(range(num_points), num_points)
        bootstrap_view = df.iloc[list(bootstrap_resample)]
        result = scipy.stats.linregress(bootstrap_view['RFP_expression'], bootstrap_view['mVenus_expression'])
        linfits[i] = (result.slope, result.intercept)
    sample_x = np.linspace(0, 10, 200)
    n_points = sample_x.shape[0]
    n_fits = len(linfits)
    confint = .99
    ci = np.zeros((2, n_points))
    for idx, x_i in enumerate(sample_x):
        values = sorted([l[1] + x_i * l[0] for l in linfits])
        ci[:,idx] = [values[np.floor(n_fits * (1 - confint) / 2).astype(int)],
                         values[np.floor(n_fits * (1 + confint) / 2).astype(int)]]
    fig.varea(sample_x, ci[0,:], ci[1,:], color=colors[2], alpha=.6, legend_label=legend + ' linear fit')
    fig = bokeh.plotting.figure(width=400, height=400, x_axis_label = 'RFP expression',
                                y_axis_label = 'mVenus expression', x_range = [])
def fix_plot_text(plot):
    plot.title.text_font='Arial'
    plot.xaxis.axis_label_text_font_style='normal'
    plot.xaxis.axis_label_text_font='Arial'
    plot.yaxis.axis_label_text_font_style='normal'
    plot.yaxis.axis_label_text_font='Arial'
    plot.xaxis.axis_label_text_font_size='12pt'
    plot.xaxis.major_label_text_font_size='10pt'
    plot.yaxis.axis_label_text_font_size='12pt'
    plot.yaxis.major_label_text_font_size='10pt'
    if len(plot.legend) != 0:
        plot.legend.label_text_font_size='12pt'
        plot.legend.label_text_font='Arial'
        plot.legend.click_policy = 'hide'
    return plot

In [None]:
fig = bokeh.plotting.figure(width=400, height=400, x_axis_label = 'RFP expression',
                                y_axis_label = 'mVenus expression', x_range = [0, 10], y_range = [0, 10])
ci_plot(wpre_sims[wpre_sims['time']==10000.0], fig, [colors20[0], colors20[0], colors20[1]], '+WPRE')
ci_plot(wpre_polyA_sims[wpre_polyA_sims['time']==10000.0], fig, [colors20[16], colors20[16], colors20[17]],
        '+WPRE, +polyA')
fig.legend.location = 'bottom_right'
bokeh.io.show(fix_plot_text(fig))

In [None]:
density = hv.render(density_plot(wpre_sims[wpre_sims['time']==10000.0]))
density.x_range = bokeh.models.Range1d(0, 7)
bokeh.io.show(fix_plot_text(density))
density = hv.render(density_plot(wpre_polyA_sims[wpre_polyA_sims['time']==10000.0]))
bokeh.io.show(fix_plot_text(density))

In [None]:
RFP_start = 3000 * .34;
RFP_end = RFP_start + (975 + 589 + 200) * .34
mVenus_end = RFP_end + (589 * .34)
mVenus_start = mVenus_end + (2915 * .34)
end_barrier = mVenus_end + (3000 * .34)

In [None]:
sim = ssc_model.SupercoilingSimulation(params, ((0,0), (end_barrier, 0)),[
                            (RFP_start, RFP_end, .3),
                            (mVenus_start, mVenus_end, .3)])

sim.enable_topo_relaxation()
single_result = sim.postprocess_run(sim.simulate((0, 12000)))
ssc_model.gen_movie(single_result, [
    (RFP_start, RFP_end, 1),
    (mVenus_start, mVenus_end, 1)], 60 * 60, 'equal', 'animation_output', True)

sim = ssc_model.SupercoilingSimulation(params, ((0,0), (end_barrier, 0)),[
                            (RFP_start, RFP_end, .3),
                            (mVenus_start, mVenus_end, 1)])

sim.enable_topo_relaxation()
single_result = sim.postprocess_run(sim.simulate((0, 12000)))
ssc_model.gen_movie(single_result, [
    (RFP_start, RFP_end, 1),
    (mVenus_start, mVenus_end, 1)], 60 * 60, 'unequal', 'animation_output', True)

In [None]:
sim = ssc_model.SupercoilingSimulation(params, ((0,0), (end_barrier, 0)),[
                            (RFP_start, RFP_end, 0.3, ['RFP'], 1000, False),
                            (mVenus_start, mVenus_end, 0.3, ['mVenus'], 1000, False)])

sim.enable_topo_relaxation()
single_result = sim.postprocess_run(sim.simulate((0, 40000)))
print('Writing movie')
ssc_model.gen_movie(single_result, [
    (RFP_start, RFP_end, 1),
    (mVenus_start, mVenus_end, 1)], 60 * 60, 'convergent_readthrough', 'animation_output', True)

In [None]:
convergent_readthrough['rel_expression'] = convergent_readthrough['RFP_expression'] - convergent_readthrough['mVenus_expression']
convergent_no_readthrough['rel_expression'] = convergent_no_readthrough['RFP_expression'] - convergent_no_readthrough['mVenus_expression']

In [None]:
convergent_args = (params, ((0,0), (end_barrier, 0)),[
                            (RFP_start, RFP_end, .3, ['RFP'], 1000, False),
                            (mVenus_start, mVenus_end, .3, ['mVenus'], 1000, False)])
convergent_readthrough = ssc_model.bulk_simulation(*convergent_args, ['RFP', 'mVenus'], (0, 10000, 1000), 10000)
convergent_readthrough.reset_index().to_feather('convergent_readthrough_10000.feather')

In [None]:
convergent_no_readthrough_args = (params, ((0,0), (end_barrier, 0)),[
                            (RFP_start, RFP_end, .3, ['RFP']),
                            (mVenus_start, mVenus_end, .3, ['mVenus'])])
convergent_no_readthrough = ssc_model.bulk_simulation(*convergent_no_readthrough_args, ['RFP', 'mVenus'], (0, 10000, 1000), 10000)
convergent_no_readthrough.reset_index().to_feather('convergent_no_readthrough_10000.feather')

In [None]:
sns.set(color_codes=True)
sns.distplot(convergent_readthrough[convergent_readthrough['time']==10000.0]['rel_expression'], hist=False,
             label='Readthrough')
sns.distplot(convergent_no_readthrough[convergent_no_readthrough['time']==10000.0]['rel_expression'], hist=False,
             label='No readthrough')
plt.xlabel('Relative expression')
plt.ylabel('Probability density')
plt.legend()
plt.savefig('convergent_readthrough.png')

In [None]:
sns.scatterplot(x="RFP_expression", y="mVenus_expression",
                data=convergent_readthrough[convergent_readthrough['time']==10000.0],
                alpha=.5, label='Readthrough')
sns.scatterplot(x="RFP_expression", y="mVenus_expression",
              data=convergent_no_readthrough[convergent_no_readthrough['time']==10000.0],
                alpha=.5, label='No readthrough')
plt.legend()
plt.show()

In [None]:
(hv.Distribution(convergent_no_readthrough[convergent_no_readthrough['time']==10000.0], 'rel_expression') *
 hv.Distribution(convergent_readthrough[convergent_readthrough['time']==10000.0], 'rel_expression'))

In [None]:
sim = ssc_model.SupercoilingSimulation(params, ((0,0), (15000, 0)),[
                            (100, 200, 1)])
plt.plot(np.linspace(0,20,100), 1 / (1 + np.exp((np.linspace(0,20,100) - 12)/.4)), linewidth=3)
plt.plot(np.linspace(0,20,100), 1 / (1 + (np.linspace(0,20,100)/12)**27), '--',linewidth=3)
plt.legend(('Exponential', 'Hill coefficent'), prop={'size':13})
plt.gca().tick_params(axis = 'both', which = 'major', labelsize = 13)
plt.savefig('response_funcs.svg')
plt.show()
plt.plot(np.linspace(0,.1,100),sim.model.polymerase_velocity(np.zeros((100,)), np.linspace(0,.1,100)), linewidth=3)
plt.xlabel('Supercoiling density (unitless)', fontsize=15)
plt.ylabel('Polymerase velocity (nm/s)',fontsize=15)
plt.gca().tick_params(axis = 'both', which = 'major', labelsize = 13)
plt.savefig('polymerase_velocity.svg')
plt.show()
plt.fill_between([.023, .0765], sim.model.torque_response(np.array([.023, .0765])), alpha=.1, color='k')
plt.plot(np.linspace(0,.1,100), sim.model.torque_response(np.linspace(0,.1,100)), linewidth=3)
plt.xlabel('Supercoiling density (unitless)', fontsize=15)
plt.ylabel('Torque (pN nm)', fontsize=15)
plt.gca().tick_params(axis = 'both', which = 'major', labelsize = 13)
plt.savefig('torque.svg')
plt.show()