This code implements a simplified estimation problem in stan for the hydrolysis data, where the underlying model is assumed to be an exponential decay.

In [2]:
# Imports 
import logging
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)

import numpy as np
import pandas as pd

import cmdstanpy
import arviz as az

import iqplot
import bebi103

import bokeh.io
import bokeh.plotting

# bokeh.io.output_notebook()

# Import seaborn for aesthetic plots 
import seaborn as sns

from tqdm.notebook import tqdm

import pandas as pd
import ast

from bokeh.plotting import figure, show, curdoc
from bokeh.io import output_notebook
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColorBar
from bokeh.transform import linear_cmap
from bokeh.palettes import Viridis256
from bokeh.themes import Theme
from bokeh.layouts import column, row
bokeh.io.output_notebook()

import scipy as sp
import matplotlib.pyplot as plt

import scipy.stats as st

# Plotting params
size = 500;





### Import Data

In [3]:
# Read data 

data_location = '../../analyzed_data/atp-hydro/ATP.csv';
# Read the CSV file into a DataFrame
df1 = pd.read_csv(data_location); 

data_location = '../../analyzed_data/atp-hydro/ADP.csv';
# Read the CSV file into a DataFrame
df2 = pd.read_csv(data_location); 

data_location = '../../analyzed_data/atp-hydro/Phosphate.csv';
# Read the CSV file into a DataFrame
df3 = pd.read_csv(data_location); 

#### ------------- Load and Read Data ------------- ####
ATP_conc_list = []
ADP_conc_list = []
P_conc_list = []
ATP_curve_list = []
ratio_curve_list = []
linear_r2_list = []
exponential_r2_list = []
linear_hydrolysis_rate_list = []
exponential_hydrolysis_rate_list = []
times_list = []
data_locations_list = []

for df in [df1]:
# for df in [df1, df2, df3]: 
    # ATP Concentrations
    ATP_conc_list.append(np.array(df["ATP Concentration (uM)"])); 

    # ADP Concentrations
    ADP_conc_list.append(np.array(df["ADP Concentration (uM)"])); 

    # Phosphate Concentrations
    P_conc_list.append(np.array(df["P Concentration (uM)"])); 

    # ATP Curves
    ATP_curve_list.append([ast.literal_eval(df["ATP Curve (uM)"][i]) for i in range(len(df))])

    # Ratio Curves
    ratio_curve_list.append([ast.literal_eval(df["Ratio (A.U.)"][i]) for i in range(len(df))])

    # Goodness of Fit
    linear_r2_list.append(np.array(df["r-squared for linear fit"])); 
    exponential_r2_list.append(np.array(df["r-squared for exponential fit"])); 

    # Hydrolysis Rate
    linear_hydrolysis_rate_list.append(np.array(df["Hydrolysis Rate (uM/s/motor) from Linear Fitting (-abs(Slope)/Motconc)"])); 
    exponential_hydrolysis_rate_list.append(np.array(df["Hydrolysis Rate (uM/s/motor) from Exponential Curve"])); 

    # Time
    times_list.append([ast.literal_eval(df["Time Array (s)"][i]) for i in range(len(df))])
    
    # Data location
    data_locations_list.append(df["Data Location"])

    
times_list = [item for sublist in times_list for item in sublist];
ATP_conc_list = [item for sublist in ATP_conc_list for item in sublist]; 
ADP_conc_list = [item for sublist in ADP_conc_list for item in sublist];
P_conc_list = [item for sublist in P_conc_list for item in sublist];
ATP_curve_list = [item for sublist in ATP_curve_list for item in sublist];
ratio_curve_list = [item for sublist in ratio_curve_list for item in sublist];
linear_r2_list = [item for sublist in linear_r2_list for item in sublist];
exponential_r2_list = [item for sublist in exponential_r2_list for item in sublist];
linear_hydrolysis_rate_list = [item for sublist in linear_hydrolysis_rate_list for item in sublist];
exponential_hydrolysis_rate_list = [item for sublist in exponential_hydrolysis_rate_list for item in sublist];
data_locations_list = [item for sublist in data_locations_list for item in sublist]; 

### Select "Well Behaved" Data

In [143]:
# Get time and ATP curve 
j_list = []; 

for i in range(len(ATP_curve_list)):
    j = np.where(np.array(ATP_curve_list[i]) < 70)[-1]; # Define an ATP floor of 50 uM. After this, the curve deviates from a straight line in nondimensionalised curves.
    j = j[0]
    if j == 0: 
        j = 10; 

    j_list.append(j);

In [144]:
high_ATP_curves_indices = np.where(np.array(ATP_conc_list) > 100)[-1]; 

sliced_ATP = []
sliced_time = []
sliced_ATP0 = []
sliced_ADP0 = []
sliced_P0 = []

for i in high_ATP_curves_indices:
    j = j_list[i]; 
    sliced_ATP.append(ATP_curve_list[i][:j])
    sliced_time.append(times_list[i][:j])
    sliced_ATP0.append(ATP_conc_list[i])
    sliced_ADP0.append(ADP_conc_list[i])
    sliced_P0.append(P_conc_list[i])

flattened_sliced_ATP = [item for curve in sliced_ATP for item in curve[:j]]; 
flattened_sliced_time = [item for curve in sliced_time for item in curve[:j]]; 
flattened_sliced_ATP0 = [atp0 for i, atp0 in enumerate(sliced_ATP0) for _ in range(len(sliced_ATP[i][:j]))]
flattened_sliced_ADP0 = [atp0 for i, atp0 in enumerate(sliced_ADP0) for _ in range(len(sliced_ATP[i][:j]))]
flattened_sliced_P0 = [atp0 for i, atp0 in enumerate(sliced_P0) for _ in range(len(sliced_ATP[i][:j]))]

data_sliced = {
    'N': len(flattened_sliced_ATP), #total number of datapoints
    'atp' : flattened_sliced_ATP, 
    'time': flattened_sliced_time,
    'atp0': flattened_sliced_ATP0,
    'adp0': flattened_sliced_ADP0,
    'p0': flattened_sliced_P0,
}


In [145]:
def y_theoretical_simplified(time, ktime, tau, atp0):
    '''
        When Keff is really big compared to atp, the theoretical equation becomes a decaying exponential. That is, 

        y = yo * exp(-t/Ktime)
    '''
    # ktime = C2*( 1 + (( atp0 + adp0 ) / KD)+ (( atp0 + p0 ) / KP));  

    result = atp0 * np.exp(-(time + tau)/ktime); 

    return result

In [146]:
p = figure()

for i in range(100):
    time = data_sliced["time"][i]; 
    y = data_sliced["atp"][i]; 
    atp0 = data_sliced["atp0"][i]

    # print(atp0)
    
    p.circle(time, np.log(y))
    p.circle(time, np.log(y_theoretical_simplified(time, 5000, 2500, atp0)))

    # p.circle(time, y)
    # p.circle(time, y_theoretical_simplified(time, 1000, 1000, atp0))
show(p)

### Load Stan Model

In [147]:
sm = cmdstanpy.CmdStanModel(stan_file='simplified_estimation.stan')
# print(sm.code())

In [148]:
samples = sm.sample(data=data_sliced, show_console = False, adapt_delta=0.8)
samples = az.from_cmdstanpy(samples)

chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                


In [149]:
bebi103.stan.check_all_diagnostics(samples)

Effective sample size looks reasonable for all parameters.

Rhat looks reasonable for all parameters.

0 of 4000 (0.0%) iterations ended with a divergence.

0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.

E-BFMI indicated no pathological behavior.


0

In [150]:
plots = [
    iqplot.histogram(
        samples.posterior[param].values.ravel(),
        q=param,
        rug=False,
        frame_height=200,
        frame_width=250,
    )
    for param in ["ktime", "tau"]
]

bokeh.io.show(bokeh.layouts.gridplot(plots, ncols=4))


In [151]:
ktime = 7000;
tau = 7;