# Generating the input-output function $P(m\mid R, C)$ for varying repressor copy number $R$.

In [1]:
# Our numerical workhorses
import numpy as np
from sympy import mpmath
import scipy.optimize
import scipy.special
import scipy.integrate
import pandas as pd
import itertools
# Import libraries to parallelize processes
from joblib import Parallel, delayed

# Import matplotlib stuff for plotting
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib

# Seaborn, useful for graphics
import seaborn as sns

# Pickle is useful for saving outputs that are computationally expensive
# to obtain every time
import pickle

import os
import glob

# Import the utils for this project
# Import the project utils
import sys
sys.path.insert(0, '../continuous_game/')
import evolution_bits_utils as evo_utils

# favorite Seaborn settings for notebooks
rc={'lines.linewidth': 2, 
    'axes.labelsize' : 16, 
    'axes.titlesize' : 18,
    'axes.facecolor' : 'F4F3F6',
    'axes.edgecolor' : '000000',
    'axes.linewidth' : 1.2,
    'xtick.labelsize' : 13,
    'ytick.labelsize' : 13,
    'grid.linestyle' : ':',
    'grid.color' : 'a6a6a6'}
sns.set_context('notebook', rc=rc)
sns.set_style('darkgrid', rc=rc)
sns.set_palette("deep", color_codes=True)

# Magic function to make matplotlib inline; other style specs must come AFTER
%matplotlib inline

# This enables SVG graphics inline (only use with static plots (non-Bokeh))
%config InlineBackend.figure_format = 'svg'

# Pre-computing analytical distributions of gene expession.

Since the computation of the mRNA and protein steady-state probability distributions are computationally expensive, we can pre-compute the distribution for different repressor copy number and save the results as a lookup table to compute any desired quantity out of these distributions including the channel capacity and the variability in gene expression due to the stochasticity of the allosteric molecules.

This notebook achieves the simple task of computing the mRNA and protein distribution for different repressor copy numbers saving the result into csv files that we can read with `numpy`.

The matrices are arranged such that each row's index is given by the number of repressors and each column index indicates either the mRNA or protein count.

## Pre-computing the mRNA distribution

Let's start by saving the distribution for mRNA molecules.

In [3]:
# Define the parameters
# k0 = 0.000123 # Fit to the O2 - RBS1027 data using the noise
k0 = 2.7E-3 # Used by Jones and Brewster

# The MWC parameters come from the global fit to the O2 data
mRNA_params = dict(ka=0.199, ki=0.00064, omega=np.exp(-4.5), 
                   k0=k0, gamma=0.00284, r_gamma=15.7)

In [5]:
# Define the mRNA copy numbers to evaluate
# It is break up in blocks to run the process in parallel
mRNA_grid = np.reshape(np.arange(0, 50), [-1, 10])

# define the array of repressor copy numbers to evaluate the function in
R_array = np.arange(0, 1001)

kon_array = [evo_utils.kon_fn(-17, mRNA_params['k0']),
             evo_utils.kon_fn(-15.3, mRNA_params['k0']),
             evo_utils.kon_fn(-13.9, mRNA_params['k0']),
             evo_utils.kon_fn(-9.7, mRNA_params['k0'])]
kon_operators = ['Oid', 'O1', 'O2', 'O3']

compute_matrix = True
if compute_matrix:
    for j, kon in enumerate(kon_array):
        print('operator : ' + kon_operators[j])
        # Set the value for the kon
        mRNA_params['kon'] = kon
        # Initialize transition matrix
        QmR = np.zeros([mRNA_grid.size, len(R_array)])
        for i, r in enumerate(R_array):
            if r%100==0:
                print('repressors : {:d}'.format(r))
            mRNA_params['rep'] = r * 1.66
            # -- Parallel computation of distribution -- #
            lnm_list = list()
            # loop through the concentrations
            # define a function to run in parallel the computation
            def lnm_parallel(m):
                lnm = evo_utils.log_p_m_mid_C(C=0, mRNA=m, **mRNA_params)
                return lnm
            lnm_list.append(Parallel(n_jobs=7)(delayed(lnm_parallel)(m) \
                                               for m in mRNA_grid))
            # -- Building and cleaning the transition matrix -- #
            for k, lnm in enumerate(lnm_list):
                # Initialize the matrix of zeros where the normalized
                # distribution will live
                p_norm = np.zeros_like(lnm)
                p = np.exp(lnm)
                # Compute the cumulative sum of the protein copy number
                p_sum = np.cumsum(np.sum(p, axis=1))
                # Find the first block that is already normalized given 
                # the tolerance value
                norm_idx = np.where((p_sum <= 1 + 1E-5) & \
                                    (p_sum >= 1 - 1E-5))[0][-1]
                # add all the probability values of these blocks to our matrix
                p_norm[0:norm_idx, :] = p[0:norm_idx, :]
            QmR[:, i] = p_norm.ravel()
            # Check that all distributions for each concentration are normalized
        np.savetxt('../../tmp/QmR_' + kon_operators[j] +\
                   '_0_1000_literature_param.csv', QmR, delimiter=",")

operator : Oid
repressors : 0
repressors : 100
repressors : 200
repressors : 300
repressors : 400
repressors : 500
repressors : 600
repressors : 700
repressors : 800
repressors : 900
repressors : 1000
operator : O1
repressors : 0
repressors : 100
repressors : 200
repressors : 300
repressors : 400
repressors : 500
repressors : 600
repressors : 700
repressors : 800
repressors : 900
repressors : 1000
operator : O2
repressors : 0
repressors : 100
repressors : 200
repressors : 300
repressors : 400
repressors : 500
repressors : 600
repressors : 700
repressors : 800
repressors : 900
repressors : 1000
operator : O3
repressors : 0
repressors : 100
repressors : 200
repressors : 300
repressors : 400
repressors : 500
repressors : 600
repressors : 700
repressors : 800
repressors : 900
repressors : 1000


### Pre-computing the protien distribution

In [15]:
# Protein parameters
k0=.000123 # Fit to the noise measurement of the O2 - RBS1027 data
prot_params = dict(ka=0.199, ki=0.00064, omega=np.exp(-4.5),
                   k0=k0,
                   gamma_m=0.00284, r_gamma_m=15.7,
                   gamma_p=0.000277, r_gamma_p=100)

In [22]:
# Define the protein blocks to evaluate in parallel
# Break into blocks to compute the distributions in parallel
prot_grid = np.reshape(np.arange(0, 4000), [-1, 250])

# define the array of repressor copy numbers to evaluate the function in
R_array = np.arange(0, 1050)

kon_array = [evo_utils.kon_fn(-9.7, prot_params['k0']),
             evo_utils.kon_fn(-17, prot_params['k0'])]
kon_operators = ['O3', 'Oid']

compute_matrix = True
if compute_matrix:
    for j, kon in enumerate(kon_array):
        print('operator : ' + kon_operators[j])
        # Set the value for the kon
        prot_params['kon'] = kon
        # Initialize transition matrix
        QpR = np.zeros([prot_grid.size, len(R_array)])
        for i, r in enumerate(R_array):
            if r%10==0:
                print('repressors : {:d}'.format(r))
            prot_params['rep'] = r * 1.66
            # -- Parallel computation of distribution -- #
            lnp_list = list()
            # define a function to run in parallel the computation
            def lnp_parallel(p):
                lnp = evo_utils.log_p_p_mid_C(C=0, protein=p, **prot_params)
                return lnp
            lnp_list.append(Parallel(n_jobs=7)(delayed(lnp_parallel)(p) \
                                               for p in prot_grid))
            # -- Building and cleaning the transition matrix -- #
            for k, lnp in enumerate(lnp_list):
                # Initialize the matrix of zeros where the normalized
                # distribution will live
                p_norm = np.zeros_like(lnp)
                p = np.exp(lnp)
#                 # Compute the cumulative sum of the protein copy number
#                 p_sum = np.cumsum(np.sum(p, axis=1))
#                 # Find the first block that is already normalized given 
#                 # the tolerance value
#                 print(p_sum)
#                 norm_idx = np.where((p_sum <= 1 + 1E-2) & \
#                                     (p_sum >= 1 - 1E-2))[0][-1]
#                 # add all the probability values of these blocks to our matrix
#                 p_norm[0:norm_idx, :] = p[0:norm_idx, :]
#             QpR[:, i] = p_norm.ravel()
            QpR[:, i] = p.ravel()
        np.savetxt('../../tmp/QpR_' + kon_operators[j] +\
                   '_0_1050_noise_fit.csv', QmR, delimiter=",")

operator : O3
repressors : 0


KeyboardInterrupt: 

In [21]:
p

array([[  1.27749753e-88,   8.43422984e-87,   2.82437709e-85, ...,
          1.19728587e-23,   1.45085433e-23,   1.75657728e-23],
       [  2.12486285e-23,   2.56813232e-23,   3.10119712e-23, ...,
          1.74528728e-10,   1.87945162e-10,   2.02341395e-10],
       [  2.17785062e-10,   2.34348181e-10,   2.52107420e-10, ...,
          3.97214337e-05,   4.09207721e-05,   4.21510966e-05],
       ..., 
       [  1.52747135e-37,   1.44397431e-37,   1.36502693e-37, ...,
          1.04359012e-43,   9.84099295e-44,   9.27991047e-44],
       [  8.75073547e-44,   8.25165821e-44,   7.78097131e-44, ...,
          3.33527488e-50,   3.13815684e-50,   2.95266392e-50],
       [  2.77811200e-50,   2.61385712e-50,   2.45929316e-50, ...,
          6.30475840e-57,   5.92038942e-57,   5.55941176e-57]])