# Fitting $\Delta\varepsilon_{AI}$ to inducer mutants 

In [3]:
import sys
import pandas as pd
import numpy as np
import pystan
import bokeh.io
import bokeh.plotting
sys.path.insert(0, '../../')
import mut.thermo
import mut.stats
import mut.bayes
bokeh.io.output_notebook()

In this notebook, we investigate a method to determine if particular mutants in the inducer binding domain can alter the allosteric energy difference $\Delta\varepsilon_{AI}$.  

In [9]:
# Load the data
data = pd.read_csv('../../data/csv/compiled_data.csv')

# Restrict the data.
data = data[(data['mutant'] == 'Y20I') | (data['mutant']=='Q294K') | (data['mutant']=='Y20I-Q294K')]

In [None]:
# Define the three models. 
model = pystan.StanModel('../stan/DNA_binding_energy.stan')

In [21]:
# Assemble the data dictionary
epR_data = data[(data['mutant']=='Y20I') & (data['IPTGuM']==0)]
data_dict = {'N':len(epR_data), 'R':epR_data['repressors'], 'Nns':4.6E6, 'c':epR_data['IPTGuM'],
            'ka':139E-6, 'ki':0.53E-6, 'ep_ai':4.5, 'n_sites':2, 'fc':epR_data['fold_change']}
samples = model.sampling(data_dict, iter=5000, chains=4)

# Compute the statistics and dataframe. 
epR_df = mut.bayes.chains_to_dataframe(samples, varnames=['epR'])
epR_stats = mut.stats.compute_statistics(epR_df)
epR_stats

  elif np.issubdtype(np.asarray(v).dtype, float):


Unnamed: 0,parameter,mode,hpd_min,hpd_max
0,epR,-9.834516,-9.979688,-9.673158
1,sigma,0.185278,0.138482,0.259845


In [33]:
# Compute the theoretical curve
rep_range = np.logspace(0, 4, 500)
theo = mut.thermo.SimpleRepression(R=rep_range, ep_r=epR_stats[epR_stats['parameter']=='epR']['mode'].values[0]).fold_change()

# Compute the means and sem for the data. 
grouped = pd.DataFrame(epR_data.groupby('repressors').apply(mut.stats.compute_mean_sem)).reset_index()
# Plot the fit.
p = bokeh.plotting.figure(plot_width=600, plot_height=400, x_axis_type='log',
                         y_axis_type='log', x_axis_label='repressors per cell',
                         y_axis_label='fold-change')
p.circle(grouped['repressors'], grouped['mean'], color='dodgerblue', size=6)
yerrs = []
xpos = []
for i in range(len(grouped)):
    x = grouped.iloc[i]['repressors']
    mean = grouped.iloc[i]['mean']
    sem = grouped.iloc[i]['sem']
    xpos.append((x,x))
    yerrs.append((mean - sem, mean + sem))
    
p.multi_line(xpos, yerrs, color='dodgerblue', line_width=2) 
p.line(rep_range, theo, color='dodgerblue', line_width=2)
bokeh.io.show(p)

## Fitting $\Delta\varepsilon_{AI}$ 

Using only the $c = 0$ data, fit the allosteric energy to the inducer mutant Q294K


In [52]:
# Load the energy fitting model
allo_model = pystan.StanModel('../stan/allosteric_energy.stan')

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_77654e917c209fa9a235a319946cc0e9 NOW.


In file included from /var/folders/2q/lvh2zsws3lxckq8xtkn_84z80000gn/T/tmpin3yaf7k/stanfit4anon_model_77654e917c209fa9a235a319946cc0e9_8362434532802113004.cpp:599:
In file included from /anaconda3/lib/python3.6/site-packages/numpy/core/include/numpy/arrayobject.h:4:
In file included from /anaconda3/lib/python3.6/site-packages/numpy/core/include/numpy/ndarrayobject.h:18:
In file included from /anaconda3/lib/python3.6/site-packages/numpy/core/include/numpy/ndarraytypes.h:1816:
 ^
    __pyx_t_12 = ((__pyx_t_9 != __pyx_v_fitptr->param_names_oi().size()) != 0);
                   ~~~~~~~~~ ^  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from /var/folders/2q/lvh2zsws3lxckq8xtkn_84z80000gn/T/tmpin3yaf7k/stanfit4anon_model_77654e917c209fa9a235a319946cc0e9_8362434532802113004.cpp:603:
In file included from /anaconda3/lib/python3.6/site-packages/pystan/stan_fit.hpp:22:
In file included from /anaconda3/lib/python3.6/site-packages/pystan/stan/src/stan/services/diagnose/diagnose.hpp:10:

In [53]:
# Restrict the data to the epsilon_AI
epAI_data = data[(data['mutant']=='Q294K') & (data['IPTGuM']==0)]

# Define the data dictionary and sample. 
data_dict = {'N':len(epAI_data), 'R':epAI_data['repressors'], 'epR':-13.9,
            'Nns':4.6E6, 'fc':epAI_data['fold_change']}

# Sample
allo_samples = allo_model.sampling(data_dict, iter=10000, chains=4)
allo_df = mut.bayes.chains_to_dataframe(allo_samples, varnames=['ep_ai'])
allo_stats = mut.stats.compute_statistics(allo_df)

  elif np.issubdtype(np.asarray(v).dtype, float):


In [54]:
allo_samples

Inference for Stan model: anon_model_77654e917c209fa9a235a319946cc0e9.
4 chains, each with iter=10000; warmup=5000; thin=1; 
post-warmup draws per chain=5000, total post-warmup draws=20000.

            mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
ep_ai      -2.49  1.4e-3    0.1  -2.68  -2.54  -2.49  -2.44  -2.29   5005    1.0
sigma       0.16  1.1e-3   0.08   0.08   0.11   0.14   0.18   0.35   5155    1.0
log_fc[0]  -1.65 4.7e-166.7e-16  -1.65  -1.65  -1.65  -1.65  -1.65      2    nan
log_fc[1]  -1.87     0.0    0.0  -1.87  -1.87  -1.87  -1.87  -1.87      2    nan
log_fc[2]  -1.68 4.7e-166.7e-16  -1.68  -1.68  -1.68  -1.68  -1.68      2    nan
log_fc[3]  -1.59 3.1e-164.4e-16  -1.59  -1.59  -1.59  -1.59  -1.59      2    nan
log_fc[4]  -1.83 3.1e-164.4e-16  -1.83  -1.83  -1.83  -1.83  -1.83      2    nan
log_fc[5]  -1.84 4.7e-166.7e-16  -1.84  -1.84  -1.84  -1.84  -1.84      2    nan
lp__        7.05    0.02   1.23   3.76   6.57   7.42   7.92   8.24   4563    1.0

In [55]:
# Plot the distribution for ep_ai
hist, edges = np.histogram(allo_df['ep_ai'], density=True, bins=100)
p = bokeh.plotting.figure(plot_width=600, plot_height=400, 
                          x_axis_label='allosteric energy',
                          y_axis_label='frequency')
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
        fill_color="dodgerblue")
bokeh.io.show(p)

In [63]:
p = bokeh.plotting.figure(plot_width=600, height=400, x_axis_type='log',
                          y_axis_type='log', x_axis_label='repressors per cell',
                          y_axis_label='fold-change')

# Compute the theoretical fit. 
theo = mut.thermo.SimpleRepression(rep_range, -13.9, ep_ai=-2.5, effector_conc=0, n_sites=2,
                                  ka=0.1, ki=0.1).fold_change() 
theo2 = mut.thermo.SimpleRepression(rep_range, -13.9, ep_ai=4.5, effector_conc=0, n_sites=2,
                                  ka=0.1, ki=0.1).fold_change() 
p.line(rep_range, theo, line_width=2, color='dodgerblue', legend = 'Δε = -2.5')
p.line(rep_range, theo2, line_width=2, color='tomato', legend = 'Δε = +4.5')
p.circle(epAI_data['repressors'], epAI_data['fold_change'], color='slategray', legend='data')
p.title.text = 'Q294K leakiness'
bokeh.io.show(p)

## Fit $K_A/K_I$ with new $\Delta\varepsilon_{AI}$ 