# Writing a data-exploring app for the Dilution Experiment

In [8]:
import sys
sys.path.insert(0, '../../')
import mwc.viz
import mwc.stats
import mwc.io
import numpy as np
import pandas as pd
import bokeh.io
import bokeh.plotting
from bokeh.models import ColumnDataSource, Div
from bokeh.layouts import layout, widgetbox
from bokeh.models.widgets import Select
from datetime import datetime
bokeh.io.output_notebook()
import glob

In [86]:
def data_explorer(doc):
    # Load all microscopy data and include determination of accepted or rejected.
    experiments = glob.glob('../processing/microscopy/*dilution')
    flucts = pd.read_csv('../../data/compiled_fluctuations.csv')

    info_df = pd.DataFrame([])
    for exp in experiments:
        date, run_number, temp, carbon, operator, _ = exp.split('/')[-1].split('_')
        status = mwc.io.scrape_frontmatter(exp)
        data = {'date':date, 'run_number':int(run_number.split('r')[1]), 
                'temp': int(temp.split('C')[0]), 'carbon':carbon, 'operator':operator,
               'status':status['status'].lower(), 'reason':status['reason']}
        
        info_df = info_df.append(data, ignore_index=True)
    
    
    fluct_source = ColumnDataSource(dict(x=[], y=[]))
    alpha_source = ColumnDataSource(dict(x=[], y=[]))
    binned_source = ColumnDataSource(dict(x=[], y=[]))
    
    p_fluct = bokeh.plotting.figure(width=400, height=400, 
                              x_axis_type='log',
                              y_axis_type='log')
    p_alpha= bokeh.plotting.figure(width=400, height=400)
    
    p_fluct.circle(x='x', y='y', source=fluct_source, size=1, color='black')
    p_fluct.circle(x='x', y='y', source=binned_source, size=3, color='firebrick')
    p_alpha.step(x='x', y='y', source=alpha_source, line_width=2, color='black')
    # Define the selectors
    def restrict_date(attr, old, new): 
        date.options = list(info_df[info_df['carbon']==new]['date'].unique())
    def restrict_run(attr, old, new):
        run.options = list(info_df[(info_df['carbon']==carbon.value) &
                                   (info_df['date']==new[0])]['run_number'].unique())

    carbon = Select(title='Carbon Source', value='glucose', options=list(info_df['carbon'].unique()))
    carbon.on_change('value', restrict_date)   
    date = Select(title='Date', value='', options=list(info_df['date'].unique()))   
    carbon.on_change('value', restrict_date)   
    run = Select(title='Run Number', value='1.0', options=list(info_df['run_number'].unique().astype(str)))
    
    
    controls = [carbon, date]
    
    def select_experiment():
        # Get the fluctuation plots 
        carbon_val = carbon.value 
        date_val = date.value
        run_val = run.value
        pref = f'{date_val}_r{int(float(run_val))}_37C_{carbon_val}'
        path = f'{pref}_O2_dilution/output/{pref}_O2_cal_factor_samples.csv'
        samples = pd.read_csv(f'../processing/microscopy/{path}')
        hist, bins = np.histogram(samples['alpha'], bins=75)
        fluct_df = flucts[(flucts.carbon==carbon_val) & 
                          (flucts.date == int(float(date_val))) &
                         (flucts.run_no==int(float(run_val)))]
        
            # Get the alpha samples. 

        return [fluct_df, {'bins':bins[:-1], 'hist':hist}]
     
    def _update_binned_events(data):
        binned = mwc.stats.bin_by_events(data, average=['summed', 'sbin_size=50)
        print(binned.keys())
        binned_source.data = dict(x=[binned['summed'], binned['fluct']])
        
    def update():
        fluct_df, step_dict = select_experiment()
        _update_binned_events(fluct_df)
        fluct_source.data = dict(x=fluct_df['summed'], y=fluct_df['sq_fluct'])
        alpha_source.data = dict(x=step_dict['bins'], y=step_dict['hist'])
        

        
    
    controls = [carbon, date, run]
    for control in controls:
        control.on_change('value', lambda attr, old, new: update())
        
    inputs = widgetbox(*controls, sizing_mode='scale_width')
    lay = layout([[inputs, p_fluct, p_alpha]], sizing_mode='fixed')
    doc.add_root(lay) 
    
bokeh.io.show(data_explorer, notebook_handle=True, notebook_url="localhost:8888")
bokeh.io.push_notebook()

  warn("Cannot find a last shown plot to update. Call output_notebook() and show(..., notebook_handle=True) before push_notebook()")
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1) content: {'events': [{'kind': 'ModelChanged', 'model': {'type': 'Select', 'id': '23315'}, 'attr': 'value', 'new': 'acetate'}], 'references': []}: FileNotFoundError("File b'../processing/microscopy/_r1_37C_acetate_O2_dilution/output/_r1_37C_acetate_O2_cal_factor_samples.csv' does not exist",)
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1) content: {'events': [{'kind': 'ModelChanged', 'model': {'type': 'Select', 'id': '23316'}, 'attr': 'value', 'new': '20181023'}], 'references': []}: KeyError("['fluct'] not in index",)


In [82]:
pd.read_csv('../processing/microscopy/20181002_r1_37C_glucose_O2_dilution/20181002_r1_37C_glucose_O2_cal_factor_samples.csv')

FileNotFoundError: File b'../processing/microscopy/20181002_r1_37C_glucose_O2_dilution/20181002_r1_37C_glucose_O2_cal_factor_samples.csv' does not exist

In [4]:
../processing/microscopy/20181002_r1_37C_glucose_O2_dilution/

NameError: name 'flucts' is not defined

In [11]:
# Load all microscopy data and include determination of accepted or rejected.
    experiments = glob.glob('../processing/microscopy/*dilution')
    flucts = pd.read_csv('../../data/compiled_fluctuations.csv')
    info_df = pd.DataFrame([])
    for exp in experiments:
        date, run_number, temp, carbon, operator, _ = exp.split('/')[-1].split('_')
        status = mwc.io.scrape_frontmatter(exp)
        data = {'date':date, 'run_number':int(run_number.split('r')[1]), 
                'temp': int(temp.split('C')[0]), 'carbon':carbon, 'operator':operator,
               'status':status['status'].lower(), 'reason':status['reason']}
        
        info_df = info_df.append(data, ignore_index=True)
    

In [46]:
info_df['date'].values

array(['20181005', '20181016', '20181026', '20181021', '20181019',
       '20181002', '20181026', '20181004', '20181025', '20181005',
       '20181026', '20181019', '20181023', '20181017', '20181025',
       '20181016', '20181011', '20181024', '20181025', '20181018',
       '20181017', '20181022'], dtype=object)

In [72]:
ls ../../

LICENSE           [36mdata[m[m/             [36mmwc[m[m/              [36mtemplates[m[m/
README.md         [36mdoc[m[m/              [36mmwc.egg-info[m[m/     [36mtests[m[m/
[36mcode[m[m/             [36mfigs[m[m/             requirements.txt
codecov.yml       [36mmiscellaneous[m[m/    setup.py
