In [180]:
# https://realpython.com/python-data-visualization-bokeh/

import os, json

import pickle as pkl
import pandas as pd

from collections import defaultdict

from bokeh import palettes
from bokeh.io import output_file
from bokeh.plotting import figure, save, reset_output
from bokeh.models import ColumnDataSource, GroupFilter, CDSView, HoverTool

In [190]:
def get_chunk_data(chunk):
    return [((".".join(e["phetioID"].split(".")[2:]), e["event"]), e["time"], e.get("parameters")) for e in chunk]

def get_chunk_head(chunk):
    return get_chunk_data(chunk)[0]

# Non tail-recursive flatten operation
def flatten(original):
    entry = original.copy()
    children = entry.get("children")
    ret = [entry]
    
    if children:
        entry.pop("children")
        
        for c in children:
            ret += flatten(c)
            
    return ret

def get_chunks_set(data):
    chunks = defaultdict(dict)
    
    for k in data.keys():
        for exp in range(3):
            chunks[k][exp] = [[s for s in flatten(f)] for f in data[k][exp]]
    
    return chunks

def chunks_to_df(chunks, mapping):
    # Put in DataFrame and rename columns
    df = pd.DataFrame([(mapping[p1][p2], time, data) for (p1, p2), time, data in [get_chunk_head(c) for c in chunks]])
    df.columns = ["name", "time", "data"]
    
    # Define new columns
    df[["object", "action"]] = df["name"].apply(lambda e : pd.Series(e.split("-", 1)))
    
    # Remove useless rows and reset index
    df = df[df["action"] != "drag"]
    df = df.reset_index(drop=True)    
    
    # Set time to seconds and start at zero
    df["time"] -= df["time"][0]
    df["time"] /= 1000

    # Eliminate potential cut drag data
    drags = df[df["action"].str[:5] == "drag-"]
    first_drag = drags.iloc[0]
    last_drag = drags.iloc[-1]

    if first_drag["action"][-3:] == "end":
        df = df.loc[int(first_drag.name) + 1:]

    if last_drag["action"][-5:] == "start":
        df = df.loc[:int(last_drag.name)]

    return df

def df_split(df):
    # Filter drags
    drags = df[df["action"].str[:4] == "drag"]

    # Split starts and finishes and concatenate
    l = drags[::2][["object", "time"]].reset_index(drop=True)
    r = drags[1::2][["time"]].reset_index(drop=True)
    drags = pd.concat([l, r], axis=1)
    drags.columns = ["object", "start", "end"]
    
    # Define new columns
    drags["len"] = drags["end"] - drags["start"]
    drags["mid"] = (drags["end"] + drags["start"]) / 2
    drags = drags[["object", "len", "mid"]]
    
    # Filter laser
    laser = df[df["object"].isin(["laser", "sim"])]
    sim = laser[laser["object"] != "laser"]
    laser = laser[~laser.index.isin(sim.index)]
    laser["state"] = laser["data"].apply(lambda d : d["newValue"])
    sim["state"] = False
    laser = pd.concat([laser, sim]).sort_values("time", ignore_index=True)
    
    # Deal with first data point oddities
    if not laser.empty and not laser.loc[0, "state"]:
        if laser.loc[0, "time"]:
            laser.loc[-1] = laser.loc[0]
            laser.index = laser.index + 1
            laser.loc[0, "time"] = 0
            laser.loc[0, "state"] = not laser.loc[1, "state"]
            laser.sort_index(inplace=True)
        else:
            laser.drop(0, inplace=True)
            laser.reset_index(drop=True, inplace=True)
            
    # Add extra row if odd number of rows
    if not laser.empty and len(laser) % 2:
        idx = len(laser)
        laser.loc[idx] = laser.loc[0]
        laser.loc[idx, "time"] = df["time"].max()
        laser.loc[idx, "state"] = not laser.loc[idx - 1, "state"]
        
    # Split starts and finishes and concatenate
    l = laser[["time"]][::2].reset_index(drop=True)
    r = laser[["time"]][1::2].reset_index(drop=True)
    laser = pd.concat([l, r], axis=1)
    laser.columns = ["start" , "end"]
    
    # Define new columns
    laser["len"] = laser["end"] - laser["start"]
    laser["mid"] = (laser["end"] + laser["start"]) / 2
    laser = laser[["len", "mid"]]
    laser["object"] = "laser"
    
    # Define others
    others = df[~df.index.isin(drags.index) & ~df.index.isin(laser.index)]
    others = others.reset_index(drop=True)
    
    return drags, laser, others

In [182]:
with open("entries.json", "r") as f:
    mapping = json.load(f)
    
with open("data.pkl", "rb") as f:
    data = pkl.load(f)
    
data = {k: data[k] for k in data.keys()}
data.keys()

dict_keys(['David', 'Gr1', 'Gr2', 'Gr3', 'Gr4', 'Gr5'])

In [183]:
entries = [
    'laser',
    'ruler',
    'concentration',
    'wavelength',
    'container',
    'solution',
    'window',
    'probe',
    'mode',
    'sim',
]

drag_entries = [
    'ruler',
    'concentration',
    'wavelength',
    'container',
    'probe',
]

laser_entries = [
    'laser',
]

other_entries = [
    'concentration',
    'wavelength',
    'solution',
    'window',
    'mode',
    'sim',
]

palette = dict(zip(entries, palettes.Category10[10]))

figure_kwargs = {
    'plot_width': 1200,
    'x_axis_label': 'Time (seconds)',
    'y_axis_label': 'Groups',
    'toolbar_location': "below",
}

alpha = 0.7

common_kwargs = {
    'height': .2,
    'fill_alpha': alpha,
    'line_alpha': min(1, alpha + .2)
}

In [191]:
# Generate chunks
chunks_set = get_chunks_set(data)

laser_mem = []

for exp in range(3):
    # New output
    output_file(os.path.join("bokeh", "plot_task_" + str(exp + 1) + ".html"))
    fig = figure(**figure_kwargs, title="Beer's Law Lab, Task " + str(exp + 1))
    
    for i, team in enumerate(chunks_set.keys()):
        # Convert data
        df = chunks_to_df(chunks_set[team][exp], mapping)
        drags, laser, others = df_split(df)
        
        # Make CDSs
        drag_cds = ColumnDataSource(drags)
        laser_cds = ColumnDataSource(laser)
        other_cds = ColumnDataSource(others)
        
        # Process drags
        for entry in drag_entries:
            fil = [GroupFilter(column_name='object', group=entry)]
            view = CDSView(source=drag_cds, filters=fil)
            
            rect_kwargs = {
                'view': view,
                'source': drag_cds,
                'color': palette[entry],
                'legend_label': entry,
                'x': 'mid',
                'y': i,
                'width': 'len',
            }
    
            fig.rect(**common_kwargs, **rect_kwargs, muted_alpha=0.1)
        
        # Process laser
        for entry in laser_entries:
            fil = [GroupFilter(column_name='object', group=entry)]
            view = CDSView(source=laser_cds, filters=fil)
            
            rect_kwargs = {
                'view': view,
                'source': laser_cds,
                'color': palette[entry],
                'legend_label': entry,
                'x': 'mid',
                'y': i + .2,
                'width': 'len',
            }
    
            fig.rect(**common_kwargs, **rect_kwargs, muted_alpha=0.1)
        
        # Process others
        for entry in other_entries:
            fil = [GroupFilter(column_name='object', group=entry)]
            view = CDSView(source=other_cds, filters=fil)

            rect_kwargs = {
                'view': view,
                'source': other_cds,
                'color': palette[entry],
                'legend_label': entry,
                'x': 'time',
                'y': i,
                'width': .1,
            }

            fig.rect(**common_kwargs, **rect_kwargs, muted_alpha=0.1)
    
    tooltips = [('Component', "@object")]       
    fig.add_tools(HoverTool(tooltips=tooltips))
    fig.legend.click_policy = 'hide'
    save(fig)
    reset_output()
    print("Experiment", exp, "generated")

Experiment 0 generated
Experiment 1 generated
Experiment 2 generated
