# Computing the calibration factor (in progress)

In [1]:
import numpy as np 
import pandas as pd
import bokeh.io 
import bokeh.plotting
import bokeh_catplot as bkcat 
import scipy.optimize
import mwc.stats 
import mwc.bayes 
import mwc.viz
import bokeh.models
import bokeh.transform
import tqdm
import bokeh.palettes
import scipy.stats
import scipy.special
import statsmodels.tools.numdiff as smnd
colors, color_list = mwc.viz.bokeh_theme()
bokeh.io.output_notebook()

In [36]:
# Load the snapshots
snaps = pd.read_csv('../../data/raw_compiled_snaps.csv')

# Load the lineages
lineages = pd.read_csv('../../data/raw_compiled_lineages.csv')

# Apply morphology filters to both. 
min_size = 0.5 / 0.065**2
max_size = 5 / 0.065**2
snaps = mwc.process.morphological_filter(snaps, area_bounds=[0.5, 5], 
                                         ar_bounds=[0, 0.8], ip_dist=0.065)
lineages = lineages[(lineages['area_1'] >= min_size) & (lineages['area_2'] >= min_size) &
                   (lineages['area_1'] <= max_size) & (lineages['area_2'] <= max_size)]

# Drop error frames 
lineages['error_frame'] = np.nan_to_num(lineages['error_frame'])
lineages = lineages[lineages['error_frame'] == 0].copy()


# Load the hierarchical model. 
model = mwc.bayes.StanModel('../stan/hierarchical_calibration_factor.stan', force_compile=True)

Precompiled model not found. Compiling model...


INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_85f803e00ef3604cc4b523f8a3ef2120 NOW.
  tree = Parsing.p_module(s, pxd, full_module_name)


finished!


In [37]:
# Start with examining **only** the glucose sample
samp = lineages[(lineages['carbon']=='glucose') & (lineages['temp']==37)].copy()
auto_samp = snaps[(snaps['carbon']=='glucose') & (snaps['temp']==37)].copy()

# Iterate through all of the days and run numbers and subratcting the chosen value
funcs = {'mean_auto':np.mean, 'median_auto':np.mean}
for g, d in samp.groupby(['date', 'run_number']):
    _auto = auto_samp[(auto_samp['date']==g[0]) &
                     (auto_samp['run_number']==g[1])]['fluor2_mean_death'].values
    # Compute the summary statistica and add it to the samp
    for v, f in funcs.items():
        auto_mch = f(_auto)
        samp.loc[(samp['date']==g[0]) & (samp['run_number']==g[1]), v] = f(_auto)


In [38]:
# Perform the background subtraction and compute integrated intensity. 
samp['I_1_tot'] = samp['area_1'] * (samp['I_1'].values - samp['mean_auto'])
samp['I_2_tot'] = samp['area_2'] * (samp['I_2'].values - samp['mean_auto'])

# Remove unphysical values. 
samp = samp[(samp['I_1_tot'] >=0) & (samp['I_2_tot'] >= 0)]

# Add identifiers for each category.
samp['day_idx'] = samp.groupby(['date']).ngroup() + 1
samp['rep_idx'] = samp.groupby(['date', 'run_number']).ngroup() + 1

# Create the mapping between rep_idx and day
rep_map = []
for g, d in samp.groupby('rep_idx'):
    rep_map.append(d['day_idx'].unique()[0])
    
# Set up the data dictionary. 
data_dict = {'J_day':samp['day_idx'].max(), 
             'K_rep': samp['rep_idx'].max(), 
             'N_fluct': len(samp),
             'day_idx': rep_map,
             'rep_idx': samp['rep_idx'].values,
             'I_1':samp['I_1_tot'].values,
             'I_2':samp['I_2_tot'].values}

# Sample the motherfucker
fit, mcmc_samples = model.sample(data_dict, iter=500)

Beginning sampling...




finished sampling!


In [40]:
fit

Inference for Stan model: anon_model_85f803e00ef3604cc4b523f8a3ef2120.
4 chains, each with iter=500; warmup=250; thin=1; 
post-warmup draws per chain=250, total post-warmup draws=1000.

                  mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
tau_alpha       -48.95   75.09 106.31 -126.8 -117.6 -98.95  31.12  134.9      2  97.15
alpha_1         6670.9  203.81 301.15 6196.0 6463.3 6813.5 6961.1 7022.7      2    5.7
alpha_2_raw[1]   13.77    16.6   23.5 -26.49  -4.88  24.06  29.68   33.3      2 112.28
alpha_2_raw[2]    5.61    4.99   7.06  -6.57   0.34   9.32   10.1  10.54      2 184.54
alpha_2_raw[3]   -2.41    1.36   1.93  -4.15  -3.81  -3.16  -0.85   0.85      2 983.57
alpha_2_raw[4]   -4.68    6.42   9.09 -10.76 -10.49  -9.51   2.14  11.01      2 731.24
alpha_2_raw[5]  -15.72    18.8  26.61 -34.18 -32.67 -29.24   3.94   30.2      2 656.01
alpha_2_raw[6]   -4.78    7.38  10.45 -11.71  -11.2 -10.35   2.82  13.28      2 805.89
alpha_2_raw[7]    5.09     6.6 