# Match Isobutanol Data
6.25.2020

In [1]:
# Import cell (refined)
import numpy as np
import pandas as pd
import scipy.special as sp
import scipy.optimize
from scipy.optimize import curve_fit
import scipy.integrate
from scipy.integrate import odeint

import panel as pn

import bokeh.io
import bokeh.plotting


import bebi103
import bokeh_catplot
from bokeh.layouts import row

import cmdstanpy
import arviz as az



bokeh.io.output_notebook()
pn.extension()
 

In [3]:
df_iso = pd.read_csv('../bowie_data_isobutanol.csv')
x0_iso = float(df_iso.columns[0])
x1_iso = float(df_iso.columns[1])
data_iso = [x0_iso, x1_iso]
new_row_iso = pd.DataFrame({'x':x0_iso, 'y':x1_iso}, index =[0])
df_col_iso = df_iso
df_col_iso.columns = ['x', 'y']
df_new_iso = pd.concat([new_row_iso, df_col_iso], ignore_index=True)

In [4]:
p_iso = bokeh.plotting.figure(width = 450, height = 350,
        
                          title = 'Bowie Lab Isobutanol Experimental Data',
                         y_axis_label = 'Isobutanol concentration (mM)',
                         x_axis_label = 'Time (hrs)')
p_iso.circle(df_new_iso['x'].values, df_new_iso['y'].values)
#p_iso.circle(x_add, y_add, color = 'orange', size = 7)
bokeh.io.show(p_iso)

## Model 1: Binding rates are normally distributed

In [6]:
df_new_iso = df_new_iso.rename(columns = {'x': 'iso produced (mM)', 'y': 'time (hrs)'})
df_new_iso.head()

Unnamed: 0,iso produced (mM),time (hrs)
0,0.273224,3.511325
1,0.270808,10.541224
2,0.681006,16.864508
3,0.953989,22.486011
4,1.364187,28.809296


Compute the rates

In [7]:
iso_vals = df_new_iso['iso produced (mM)'].values
times = df_new_iso['time (hrs)'].values

In [8]:
rates = []
for i,val in enumerate(iso_vals):
    if i+1 == len(iso_vals):
        break
    rates.append((iso_vals[i+1]- iso_vals[i]) /(times[i+1] - times[i]))

In [10]:
rates_pos = [r for r in rates if r>0]

Let's do the bayesian stuff now

In [12]:
sm = cmdstanpy.CmdStanModel(stan_file='normal_posterior_isob.stan')

INFO:cmdstanpy:compiling stan program, exe file: /Users/ankitaroychoudhury/Documents/MURRAY/simulations/bowie_data/stan_isobutanol/normal_posterior_isob
INFO:cmdstanpy:compiler options: stanc_options=None, cpp_options=None
INFO:cmdstanpy:compiled model file: /Users/ankitaroychoudhury/Documents/MURRAY/simulations/bowie_data/stan_isobutanol/normal_posterior_isob


In [30]:
rates_pos

[0.06487106017191861,
 0.04856037816931696,
 0.06487106017191913,
 0.09750644883921132,
 0.0431245225362865,
 0.02410311493018299,
 0.13016055045871425,
 0.032254368375823,
 0.024103114930183314,
 0.1394937331039571,
 0.02759636631475334,
 0.0835175632522748,
 0.07792297111416514,
 0.07792297111416799,
 0.04856037816931792,
 0.05555009823182729,
 0.13949373310395569,
 0.0975064488392077,
 0.04856037816931852,
 0.04856037816931652,
 0.11149877149877153,
 0.0835175632522717,
 0.12199526983444431,
 0.11149877149877056,
 0.09750644883920974,
 0.1710045171004501,
 0.08351756325227194,
 0.09750644883920857,
 0.0485603781693168,
 0.12199526983444507,
 0.16750245821042217,
 0.1464946236559145,
 0.1083890693674762,
 0.14649462365591323,
 0.16283338112991275,
 0.08351756325227218,
 0.19552495697074104,
 0.12199526983444414,
 0.1114987714987716,
 0.23477961432507158,
 0.19552495697073805,
 0.1394937331039586,
 0.12199526983444102,
 0.16750245821042456,
 0.13949373310395788,
 0.2740613158801223,
 

In [13]:
data = rates_pos
N_ppc = 200
data_dict = {
    'N': len(data),
    'k': data,
    "N_ppc":N_ppc
}

In [14]:
samples_exp_post = sm.sample(data=data_dict, iter_sampling=1000, chains=4)

samples_exp_post = az.from_cmdstanpy(posterior=samples_exp_post, posterior_predictive=['k_ppc'])

INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:start chain 2
INFO:cmdstanpy:finish chain 2
INFO:cmdstanpy:finish chain 1
INFO:cmdstanpy:start chain 3
INFO:cmdstanpy:start chain 4
INFO:cmdstanpy:finish chain 3
INFO:cmdstanpy:finish chain 4


get parameter estimate for mu, sigma

In [15]:
df_mcmc_exp = samples_exp_post.posterior.to_dataframe()
df_mcmc_exp.mean()

mu       0.317867
sigma    0.403421
dtype: float64

In [16]:
mu_ecdf = bokeh_catplot.ecdf(df_mcmc_exp, val='mu', title = 'mu')
sigma_ecdf = bokeh_catplot.ecdf(df_mcmc_exp, val='sigma', title = 'sigma')
bokeh.io.show(row(mu_ecdf, sigma_ecdf))

In [18]:
k_ppc_exp = samples_exp_post.posterior_predictive['k_ppc'].stack(
    {"sample": ("chain", "draw")}
).transpose("sample", "k_ppc_dim_0")

bokeh.io.show(
    bebi103.viz.predictive_ecdf(
        k_ppc_exp,
        percentiles=[30, 50, 70, 99],
        data=data,
        x_axis_label='Isobutanol production rates (mM/hr)',
        title = 'Isobutanol Production Rates as a Normal Distribution'
    )
)

## Do it with 5 data points

In [31]:
times = [0.3375695896986812, 
7.287510378903718, 
26.716636645356843, 
49.482257179025325, 
72.72809767739419]
iso_vals = [0.8657026574530278,106.97507701940572,251.14995984555284,
        319.49872730820016,328.36991093426013]

In [32]:
rates = []
for i, val in enumerate(iso_vals):
    if i+1 == len(iso_vals):
        break
    rates.append((iso_vals[i+1] - iso_vals[i]) / (times[i+1] - times[i]))



In [33]:
rates

[15.26766595289079, 7.42055411287761, 3.002280010842011, 0.38162455888323144]

In [34]:
sm = cmdstanpy.CmdStanModel(stan_file='normal_posterior_isob.stan')

INFO:cmdstanpy:found newer exe file, not recompiling
INFO:cmdstanpy:compiled model file: /Users/ankitaroychoudhury/Documents/MURRAY/simulations/bowie_data/stan_isobutanol/normal_posterior_isob


In [35]:
data = rates
N_ppc = 200
data_dict = {
    'N': len(data),
    'k': data,
    "N_ppc":N_ppc
}

In [36]:
samples_exp_post = sm.sample(data=data_dict, iter_sampling=1000, chains=4)

samples_exp_post = az.from_cmdstanpy(posterior=samples_exp_post, posterior_predictive=['k_ppc'])

INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:start chain 2
INFO:cmdstanpy:finish chain 1
INFO:cmdstanpy:start chain 3
INFO:cmdstanpy:finish chain 2
INFO:cmdstanpy:start chain 4
INFO:cmdstanpy:finish chain 4
INFO:cmdstanpy:finish chain 3


In [37]:
df_mcmc_exp = samples_exp_post.posterior.to_dataframe()
df_mcmc_exp.mean()

mu        0.009230
sigma    11.140681
dtype: float64

In [38]:
mu_ecdf = bokeh_catplot.ecdf(df_mcmc_exp, val='mu', title = 'mu')
sigma_ecdf = bokeh_catplot.ecdf(df_mcmc_exp, val='sigma', title = 'sigma')
bokeh.io.show(row(mu_ecdf, sigma_ecdf))

In [39]:
k_ppc_exp = samples_exp_post.posterior_predictive['k_ppc'].stack(
    {"sample": ("chain", "draw")}
).transpose("sample", "k_ppc_dim_0")

bokeh.io.show(
    bebi103.viz.predictive_ecdf(
        k_ppc_exp,
        percentiles=[30, 50, 70, 99],
        data=data,
        x_axis_label='Isobutanol production rates (mM/hr)',
        title = 'Isobutanol Production Rates as a Normal Distribution (5 data points)'
    )
)

# Isobutanol, NORMAL, all data, with log likelihood

In [54]:
sm_norm_postlog = cmdstanpy.CmdStanModel(stan_file = "../stan_glucose/normal_loglik_posterior.stan")

print(sm_norm_postlog.code())

INFO:cmdstanpy:compiling stan program, exe file: /Users/ankitaroychoudhury/Documents/MURRAY/simulations/bowie_data/stan_glucose/normal_loglik_posterior
INFO:cmdstanpy:compiler options: stanc_options=None, cpp_options=None
INFO:cmdstanpy:compiled model file: /Users/ankitaroychoudhury/Documents/MURRAY/simulations/bowie_data/stan_glucose/normal_loglik_posterior


data {
  int<lower=0> N;
  real k[N];
  int N_ppc;
}

parameters {
  real<lower=0> mu;
  real<lower=0> sigma;
}

model {

    //Priors
  mu ~ lognormal(0, 10);
  sigma ~ lognormal(0,10);
  //Likelihood
  
  //k ~ normal(0, 1);
  
  k ~ normal(mu, sigma);
}

generated quantities{

    real k_ppc[N_ppc];
    real log_lik[N];
    
    for (i in 1:N_ppc){
        k_ppc[i] = normal_rng(mu, sigma);
        }
        
    for (i in 1:N) {
        log_lik[i] = normal_lpdf(k[i] | mu, sigma);
        }
}




In [55]:
data = rates_pos
N_ppc = 200
data_norm = {
    "N": len(data),
    "k": data,
    "N_ppc": N_ppc
}
samples_norm_postlog = sm_norm_postlog.sample(data=data_norm, iter_sampling=1000, chains=4)

samples_norm_postlog = az.from_cmdstanpy(posterior=samples_norm_postlog, posterior_predictive=['k_ppc'],
                                        log_likelihood = 'log_lik')
df_mcmc_norm_log = samples_norm_postlog.posterior.to_dataframe()
df_mcmc_norm_log.mean()

INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:start chain 2
INFO:cmdstanpy:finish chain 2
INFO:cmdstanpy:start chain 3
INFO:cmdstanpy:finish chain 3
INFO:cmdstanpy:start chain 4
INFO:cmdstanpy:finish chain 4
INFO:cmdstanpy:finish chain 1


mu       0.316398
sigma    0.402141
dtype: float64

In [56]:
k_ppc_normlog = samples_norm_postlog.posterior_predictive['k_ppc'].stack(
    {"sample": ("chain", "draw")}
).transpose("sample", "k_ppc_dim_0")

bokeh.io.show(
    bebi103.viz.predictive_ecdf(
        k_ppc_normlog,
        percentiles=[30, 50, 70, 99],
        data=data,
        x_axis_label='glucose binding rates (mM/hr)',
        title = 'Glucose Binding Rates as a Normal Distribution'
    )
)

In [57]:
az.loo(samples_norm_postlog)



  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "


Computed from 4000 by 81 log-likelihood matrix

         Estimate       SE
elpd_loo   -45.36    18.10
p_loo        7.28        -


# Glucose, GAMMA, all data, with log likelihood

In [50]:
sm_gamma_post = cmdstanpy.CmdStanModel(stan_file = "../stan_glucose/gamma_posteriorlog.stan")

print(sm_gamma_post.code())

INFO:cmdstanpy:found newer exe file, not recompiling
INFO:cmdstanpy:compiled model file: /Users/ankitaroychoudhury/Documents/MURRAY/simulations/bowie_data/stan_glucose/gamma_posteriorlog


data {
  int<lower=0> N;
  real k[N];
  int<lower=0> N_ppc;
}

parameters {
  real<lower=0> alpha;
  real<lower=0> beta;
}

model {

    //Priors
  alpha ~ lognormal(0, 2);
  beta ~ lognormal(0,3);
  //Likelihood
  
  //k ~ normal(0, 1);
  
  k ~ gamma(alpha, beta);
}

generated quantities{

    real k_ppc[N_ppc];
    real log_lik[N];


    for (i in 1:N_ppc){
        k_ppc[i] = gamma_rng(alpha, beta);
        }
        
    for (i in 1:N) {
        log_lik[i] = gamma_lpdf(k[i] | alpha, beta);
        }
}



In [51]:
data = rates_pos
N_ppc = 200
data_norm = {
    "N": len(data),
    "k": data,
    "N_ppc": N_ppc
}
samples_gamma_post = sm_gamma_post.sample(data=data_norm, iter_sampling=1000, chains=4)

samples_gamma_post = az.from_cmdstanpy(posterior=samples_gamma_post, posterior_predictive=['k_ppc'],
                                      log_likelihood ='log_lik')
df_mcmc_gamma_post = samples_gamma_post.posterior.to_dataframe()
df_mcmc_gamma_post.mean()

INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:start chain 2
INFO:cmdstanpy:finish chain 1
INFO:cmdstanpy:start chain 3
INFO:cmdstanpy:finish chain 2
INFO:cmdstanpy:start chain 4
INFO:cmdstanpy:finish chain 3
INFO:cmdstanpy:finish chain 4


alpha    1.058780
beta     3.274756
dtype: float64

In [52]:
k_ppc_gammalog = samples_gamma_post.posterior_predictive['k_ppc'].stack(
    {"sample": ("chain", "draw")}
).transpose("sample", "k_ppc_dim_0")

bokeh.io.show(
    bebi103.viz.predictive_ecdf(
        k_ppc_gammalog,
        percentiles=[30, 50, 70, 99],
        data=data,
        x_axis_label='isobutanol binding rates (mM/hr)',
        title = 'Isobutanol Binding Rates as a Gamma Distribution'
    )
)

In [58]:
az.compare({'normal': samples_norm_postlog, 'gamma': samples_gamma_post}, ic='loo')

  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "


Unnamed: 0,rank,loo,p_loo,d_loo,weight,se,dse,warning,loo_scale
gamma,0,8.1626,2.56171,0.0,1.0,18.2344,0.0,False,log
normal,1,-45.3551,7.27519,53.5177,2.10422e-16,11.879,10.3133,True,log
