# Computing the calibration factor (in progress)

In [52]:
import numpy as np 
import pandas as pd
import bokeh.io 
import bokeh.plotting
import bokeh_catplot as bkcat 
import scipy.optimize
import mwc.stats 
import mwc.bayes 
import mwc.viz
import bokeh.models
import bokeh.transform
import tqdm
import bokeh.palettes
import scipy.stats
import scipy.special
import statsmodels.tools.numdiff as smnd
import tqdm
colors, color_list = mwc.viz.bokeh_theme()
bokeh.io.output_notebook()

In [78]:
# Load the snapshots
snaps = pd.read_csv('../../data/raw_compiled_snaps.csv')

# Load the lineages
lineages = pd.read_csv('../../data/raw_compiled_lineages.csv')

# Apply morphology filters to both. 
min_size = 0.5 / 0.065**2
max_size = 5 / 0.065**2
snaps = mwc.process.morphological_filter(snaps, area_bounds=[0.5, 5], 
                                         ar_bounds=[0, 0.8], ip_dist=0.065)
lineages = lineages[(lineages['area_1'] >= min_size) & (lineages['area_2'] >= min_size) &
                   (lineages['area_1'] <= max_size) & (lineages['area_2'] <= max_size)]

# Drop error frames 
lineages['error_frame'] = np.nan_to_num(lineages['error_frame'])
lineages = lineages[lineages['error_frame'] == 0].copy()


# Load the hierarchical model. 
# model = mwc.bayes.StanModel('../stan/calibration_factor.stan') #, force_compile=True)
model = mwc.bayes.StanModel('../stan/hierarchical_calibration_factor.stan', force_compile=True)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_ec9b6f17a0c5c7c8560abb5a3db50f1e NOW.


Precompiled model not found. Compiling model...


  tree = Parsing.p_module(s, pxd, full_module_name)


finished!


In [86]:
# Start with examining **only** the glucose sample
samp = lineages[(lineages['carbon']=='acetate') & (lineages['temp']==37)].copy()
auto_samp = snaps[(snaps['carbon']=='acetate') & (snaps['temp']==37) & (snaps['strain']=='auto')].copy()

# Iterate through all of the days and run numbers and subratcting the chosen value
funcs = {'mean_auto':np.mean, 'median_auto':np.mean}
for g, d in samp.groupby(['date', 'run_number']):
    _auto = auto_samp[(auto_samp['date']==g[0]) &
                     (auto_samp['run_number']==g[1])]['fluor2_mean_death'].values
    # Compute the summary statistica and add it to the samp
    for v, f in funcs.items():
        auto_mch = f(_auto)
        samp.loc[(samp['date']==g[0]) & (samp['run_number']==g[1]), v] = f(_auto)


In [None]:
# Perform the background subtraction and compute integrated intensity. 
samp['I_1_tot'] = samp['area_1'] * (samp['I_1'].values - samp['median_auto'])
samp['I_2_tot'] = samp['area_2'] * (samp['I_2'].values - samp['median_auto'])

# Remove unphysical values. 
samp = samp[(samp['I_1_tot'] >=0) & (samp['I_2_tot'] >= 0)]

# Add identifiers for each category.
samp['day_idx'] = samp.groupby(['date']).ngroup() + 1
samp['rep_idx'] = samp.groupby(['date', 'run_number']).ngroup() + 1

# Create the mapping between rep_idx and day
rep_map = []
for g, d in samp.groupby('rep_idx'):
    rep_map.append(d['day_idx'].unique()[0])
    
# Set up the data dictionary. 
data_dict = {'J_day':samp['day_idx'].max(), 
             'K_rep': samp['rep_idx'].max(), 
             'N_fluct': len(samp),
             'day_idx': rep_map,
             'rep_idx': samp['rep_idx'].values,
             'I_1':samp['I_1_tot'].values,
             'I_2':samp['I_2_tot'].values}

# Sample the motherfucker
fit, mcmc_samples = model.sample(data_dict, iter=5000)

Beginning sampling...


In [88]:
fit

Inference for Stan model: anon_model_ec9b6f17a0c5c7c8560abb5a3db50f1e.
4 chains, each with iter=500; warmup=250; thin=1; 
post-warmup draws per chain=250, total post-warmup draws=1000.

                  mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
tau_alpha         23.3   14.76  20.99  -3.55   1.46  25.42  43.02  47.39      2  19.48
alpha_1         159.66   11.34  16.66 126.87 147.83 165.24 171.89 177.05      2   5.81
alpha_2_raw[1]   -0.25    0.77   1.09  -1.52  -1.37  -0.25   0.84   1.03      2  75.82
alpha_2_raw[2]    0.38    0.34   0.48  -0.36  -0.09   0.44   0.85   0.89      2  15.42
alpha_2_raw[3]   -1.18    0.87   1.24  -2.75  -2.49  -1.21  -0.17   0.61      2   10.7
alpha_2_raw[4]    0.17    0.58   0.83  -1.01  -0.57   0.17   0.75   1.38      2  10.72
alpha_2_raw[5]   -0.06    0.86   1.23  -2.09  -0.73  -0.07   1.03   1.58      2   8.81
alpha_2_raw[6]    0.16    0.66   0.93  -1.18  -0.65   0.18   0.97   1.44      2 362.37
alpha_2_raw[7]   -0.46    0.86 

In [74]:
stats_dfs = pd.DataFrame([])
for g, d in tqdm.tqdm(samp.groupby(['date', 'run_number'])):
    # Set up the data dict and sample
    data_dict = {'N': len(d), 'I1':d['I_1_tot'], 'I2':d['I_2_tot']}
    _, samples = model.sample(data_dict, iter=2000)

    # Compute the important stats of alpha. 
    mean_alpha = np.median(samples['alpha'])
    alpha_min,alpha_max = mwc.stats.compute_hpd(samples['alpha'], 0.95)
    stats_dfs = stats_dfs.append({'date':g[0], 'run_number':g[1], 
                                  'mean_alpha':mean_alpha,
                                  'alpha_min':alpha_min, 
                                  'alpha_max':alpha_max}, ignore_index=True)

  0%|          | 0/12 [00:00<?, ?it/s]

Beginning sampling...


  8%|▊         | 1/12 [00:01<00:16,  1.49s/it]

finished sampling!
Beginning sampling...


 17%|█▋        | 2/12 [00:07<00:28,  2.85s/it]

finished sampling!
Beginning sampling...


 25%|██▌       | 3/12 [00:10<00:25,  2.83s/it]

finished sampling!
Beginning sampling...


 33%|███▎      | 4/12 [00:13<00:23,  2.92s/it]

finished sampling!
Beginning sampling...


 42%|████▏     | 5/12 [00:15<00:19,  2.78s/it]

finished sampling!
Beginning sampling...


 50%|█████     | 6/12 [00:18<00:16,  2.76s/it]

finished sampling!
Beginning sampling...


 58%|█████▊    | 7/12 [00:19<00:11,  2.28s/it]

finished sampling!
Beginning sampling...


 67%|██████▋   | 8/12 [00:21<00:08,  2.22s/it]

finished sampling!
Beginning sampling...


 75%|███████▌  | 9/12 [00:23<00:05,  1.91s/it]

finished sampling!
Beginning sampling...


 83%|████████▎ | 10/12 [00:23<00:03,  1.59s/it]

finished sampling!
Beginning sampling...


 92%|█████████▏| 11/12 [00:29<00:02,  2.70s/it]

finished sampling!
Beginning sampling...


100%|██████████| 12/12 [00:32<00:00,  2.94s/it]

finished sampling!





In [76]:
stats_dfs

Unnamed: 0,alpha_max,alpha_min,date,mean_alpha,run_number
0,63.711477,61.518947,20181021.0,62.635996,1.0
1,45.788854,43.60825,20181024.0,44.694171,1.0
2,52.606496,50.471268,20181025.0,51.460419,1.0
3,62.058422,59.92162,20181121.0,61.023441,1.0
4,55.151051,52.839682,20181127.0,53.973398,1.0
5,36.746332,34.516709,20190102.0,35.698389,1.0
6,26.631794,24.430548,20190102.0,25.528411,2.0
7,47.121473,44.891318,20190103.0,46.051134,1.0
8,41.079966,38.803902,20190103.0,39.954739,2.0
9,35.985518,33.644575,20190104.0,34.779607,1.0
