# Generating the input-output function $P(g\mid R, c)$ for varying repressor copy number $R$.

In [1]:
import pickle
import os
import glob
import datetime

# Our numerical workhorses
import numpy as np
from sympy import mpmath
import scipy.optimize
import scipy.special
import scipy.integrate
import pandas as pd
import itertools

# Import libraries to parallelize processes
from joblib import Parallel, delayed

# Import the utils for this project
import chann_cap_utils as chann_cap

# Pre-computing analytical distributions of gene expession.

Since the computation of the mRNA and protein steady-state probability distributions are computationally expensive, we can pre-compute the distribution for different repressor copy number and save the results as a lookup table to compute any desired quantity out of these distributions including the channel capacity and the variability in gene expression due to the stochasticity of the allosteric molecules.

This notebook achieves the simple task of computing the mRNA and protein distribution for different repressor copy numbers saving the result into csv files that we can read with `numpy`.

The matrices are arranged such that each row's index is given by the number of repressors and each column index indicates either the mRNA or protein count.

## Pre-computing the mRNA distribution

Let's start by saving the distribution for mRNA molecules.

In [None]:
# Define the parameters
k0 = 2.7E-3 # Used by Jones and Brewster

# The MWC parameters come from the global fit to the O2 data
mRNA_params = dict(ka=0.199, ki=0.00064, omega=np.exp(-4.5), 
                   k0=k0, gamma=0.00284, r_gamma=15.7)

In [None]:
# Define the mRNA copy numbers to evaluate
# It is break up in blocks to run the process in parallel
mRNA_grid = np.reshape(np.arange(0, 50), [-1, 10])

# define the array of repressor copy numbers to evaluate the function in
R_array = np.arange(0, 1001)

kon_array = [chann_cap.kon_fn(-17, mRNA_params['k0']),
             chann_cap.kon_fn(-15.3, mRNA_params['k0']),
             chann_cap.kon_fn(-13.9, mRNA_params['k0']),
             chann_cap.kon_fn(-9.7, mRNA_params['k0'])]
kon_operators = ['Oid', 'O1', 'O2', 'O3']

compute_matrix = True
if compute_matrix:
    for j, kon in enumerate(kon_array):
        print('operator : ' + kon_operators[j])
        # Set the value for the kon
        mRNA_params['kon'] = kon
        # Initialize transition matrix
        QmR = np.zeros([mRNA_grid.size, len(R_array)])
        for i, r in enumerate(R_array):
            if r%100==0:
                print('repressors : {:d}'.format(r))
            mRNA_params['rep'] = r * 1.66
            # -- Parallel computation of distribution -- #
            lnm_list = list()
            # loop through the concentrations
            # define a function to run in parallel the computation
            def lnm_parallel(m):
                lnm = chann_cap.log_p_m_mid_C(C=0, mRNA=m, **mRNA_params)
                return lnm
            lnm_list.append(Parallel(n_jobs=7)(delayed(lnm_parallel)(m) \
                                               for m in mRNA_grid))
            # -- Building and cleaning the transition matrix -- #
            for k, lnm in enumerate(lnm_list):
                # Initialize the matrix of zeros where the normalized
                # distribution will live
                p_norm = np.zeros_like(lnm)
                p = np.exp(lnm)
                # Compute the cumulative sum of the protein copy number
                p_sum = np.cumsum(np.sum(p, axis=1))
                # Find the first block that is already normalized given 
                # the tolerance value
                norm_idx = np.where((p_sum <= 1 + 1E-5) & \
                                    (p_sum >= 1 - 1E-5))[0][-1]
                # add all the probability values of these blocks to our matrix
                p_norm[0:norm_idx, :] = p[0:norm_idx, :]
            QmR[:, i] = p_norm.ravel()
            # Check that all distributions for each concentration are normalized
        np.savetxt('../../tmp/QmR_' + kon_operators[j] +\
                   '_0_1000_literature_param.csv', QmR, delimiter=",")

### Pre-computing the protien distribution

In [3]:
# Protein parameters
k0 = 2.7E-3 # From Jones & Brewster
prot_params = dict(ka=141.52, ki=0.56061, epsilon=4.5,
                   kon=chann_cap.kon_fn(-9.7, k0),
                   k0=k0,
                   gamma_m=0.00284, r_gamma_m=15.7,
                   gamma_p=0.000277, r_gamma_p=100)

In [68]:
# Define the protein blocks to evaluate in parallel
# Break into blocks to compute the distributions in parallel
prot_grid = np.reshape(np.arange(0, 4000), [-1, 250])

# define the array of repressor copy numbers to evaluate the function in
R_array = np.arange(0, 1050)

# Setting the kon parameter based on k0 and the binding energies form stat. mech.
kon_array = [chann_cap.kon_fn(-15.9, prot_params['k0']),
             chann_cap.kon_fn(-13.9, prot_params['k0']),
             chann_cap.kon_fn(-9.7, prot_params['k0']),
             chann_cap.kon_fn(-17, prot_params['k0'])]
kon_operators = ['O1', 'O2', 'O3', 'Oid']
kon_dict = dict(zip(kon_operators, kon_array))

compute_matrix = True
if compute_matrix:
    for op, kon in kon_dict.items():
        print('operator : ' + op)
        # Set the value for the kon
        prot_params['kon'] = kon
        # Initialize transition matrix
        QpR = np.zeros([prot_grid.size, len(R_array)])
        for i, r in enumerate(R_array):
            if r%50==0:
                print('repressors : {:d}'.format(r))
            prot_params['rep'] = r * 1.66
            # -- Parallel computation of distribution -- #
            # define a function to run in parallel the computation
            def lnp_parallel(p):
                lnp = chann_cap.log_p_p_mid_C(C=0, protein=p, **prot_params)
                df = pd.DataFrame([r] * len(p), index=p, columns=['repressor'])
                df.loc[:, 'protein'] = pd.Series(p, index=df.index)
                df.loc[:, 'lnp'] = lnp
                # Define filename
                file = '../../data/csv_protein_dist/lnp_' + op + '_DJ_RB.csv'
                # if file does not exist write header 
                if not os.path.isfile(file): 
                    df.to_csv(file) 
                else: # else it exists so append without writing the header
                    df.to_csv(file, mode='a', header=False)
            Parallel(n_jobs=7)(delayed(lnp_parallel)(p) for p in prot_grid)

operator : Oid
repressors : 0


Process ForkPoolWorker-349:
Process ForkPoolWorker-346:
Process ForkPoolWorker-347:
Process ForkPoolWorker-348:
Process ForkPoolWorker-350:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/razo/anaconda/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/razo/anaconda/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/Users/razo/anaconda/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/razo/anaconda/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/razo/anaconda/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/razo/anaconda/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.ru

KeyboardInterrupt: 

In [67]:
[p for p in prot_grid][1]

array([250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262,
       263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275,
       276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288,
       289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301,
       302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
       315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327,
       328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340,
       341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353,
       354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366,
       367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379,
       380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392,
       393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405,
       406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418,
       419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 43