# Interactive Plots with Bokeh

## Motivation

When doing research we often try many specifications ...

However, it's extremely difficult to recognize patterns by staring at large regression tables!

## What we'll build today: A plot to compare estimation results

- Plots should be designed to answer questions
- While answering one question, no information that is not relevant to that question should be displayed.
- Interactive plots can display and hide information as needed as we explore the data!

### The questions our plot will be able to answer:

1. Are parameters different across models?
2. Is the difference relevant in comparison to standard errors?
3. Are some models responsible for all outliers? 
4. Are there other patterns across models?
5. Are there patterns across classes of models?

**Credits:**

- Janos for the idea
- Laura for a basic version

In [1]:
from comparison_plot import comparison_plot

In [2]:
# matplotlib inline
from bokeh.plotting import output_notebook
output_notebook()

In [3]:
import pickle
with open('just_tnc.pickle', 'rb') as f:
    just_tnc = pickle.load(f)
with open('full_res.pickle', 'rb') as f:
    full_res = pickle.load(f)

grid, plots = comparison_plot(just_tnc, width=500)

In [4]:
color_dict={
    'TNC': '#035096', 
    'SLSQP': 'firebrick', 
    'L-BFGS-B': 'forestgreen'
}
grid, plots = comparison_plot(full_res, color_dict, height=800, width=500)

# Introduction to Bokeh

In [5]:
import numpy as np
import pandas as pd
import bokeh

## Our Inputs

In [6]:
from comparison_plot import process_inputs

df, param_groups_and_heights, plot_width = process_inputs(
    data_dict=full_res,
    color_dict=color_dict,
    marker_dict=None,
    height=800,
    width=500)

cols = [
    'model_class', 'model', 'color', 
    'group', 'name', 'start_value', 'final_value']

In [7]:
# what our data looks like ...
df[cols][115:125]

Unnamed: 0,model_class,model,color,group,name,start_value,final_value
115,TNC,scipy_TNC_9,#035096,b,b0,2.581347,-0.003407
116,TNC,scipy_TNC_9,#035096,a,a1,-0.370705,0.011009
117,TNC,scipy_TNC_9,#035096,b,b1,1.192545,-0.0011
118,TNC,scipy_TNC_9,#035096,a,a2,-2.92162,0.002294
119,TNC,scipy_TNC_9,#035096,b,b2,0.095838,-0.000222
120,SLSQP,scipy_SLSQP_0,firebrick,a,a0,-0.096743,-0.005357
121,SLSQP,scipy_SLSQP_0,firebrick,b,b0,-1.090278,0.000536
122,SLSQP,scipy_SLSQP_0,firebrick,a,a1,-2.834522,-0.010051
123,SLSQP,scipy_SLSQP_0,firebrick,b,b1,1.742569,0.001005
124,SLSQP,scipy_SLSQP_0,firebrick,a,a2,-0.336757,-0.033638


In [8]:
# we will start with just one group of parameters:
a_data = df[df['group'] == 'a']
a_data[cols].head()

Unnamed: 0,model_class,model,color,group,name,start_value,final_value
0,L-BFGS-B,scipy_L-BFGS-B_0,forestgreen,a,a0,-0.096743,0.003724
2,L-BFGS-B,scipy_L-BFGS-B_0,forestgreen,a,a1,-2.834522,-0.004766
4,L-BFGS-B,scipy_L-BFGS-B_0,forestgreen,a,a2,-0.336757,0.00409
6,L-BFGS-B,scipy_L-BFGS-B_1,forestgreen,a,a0,2.240667,0.020769
8,L-BFGS-B,scipy_L-BFGS-B_1,forestgreen,a,a1,1.303548,-0.002299


## Our First Plot with Bokeh: A Simple Scatter Plot

In [9]:
from bokeh.plotting import figure, show

p = figure()

p.scatter(
    source=a_data, 
    x="start_value", 
    y="final_value",
    color="color",
    )

show(p)

## Styling plots is very similar to matplotlib...

In [10]:
p = figure(
    title = "Dependence of the Final Parameter on the Start Value",
    plot_height=300,
    plot_width=500,
)

p.circle(
    source=a_data, 
    x="start_value", 
    y="final_value",
    color="color", 
    
    size=12,
    alpha=0.5,
    )

p.xaxis.axis_label = 'Start Value'
p.yaxis.axis_label = 'Final Value'

show(p)

### When we have a non numeric axis we need to pass it explicitely ...

In [11]:
p = figure(
    title="Comparison Plot of A Parameters",
    plot_height=300,
    plot_width=500,
    
    # =======================================
    y_range=sorted(a_data['full_name'].unique(), reverse=True),
    # =======================================
)

p.scatter(
    source=df,
    x="final_value",
    
    # =======================================
    y="full_name",
    # =======================================
    
    color="color",
    size=12,
    alpha=0.5,
)

show(p)

### Add the Confidence Intervals as Whiskers

In [12]:
p = figure(
    title="Comparison Plot of A Parameters",
    plot_height=300,
    plot_width=500,
    y_range=sorted(a_data['full_name'].unique(), reverse=True),
)

p.scatter(
    source=a_data,
    x="final_value",
    y="full_name",    
    color="color",
    size=12,
    alpha=0.5,
)

# =======================================
p.hbar(
    source=a_data,
    left="conf_int_lower",
    right="conf_int_upper",
    y="full_name",
    color="color",
    height=0.01,
    alpha=0.2,
)
# =======================================

show(p)

## Our First Interaction: Only Show the Confidence Interval of Selected Point(s)

In [13]:
from bokeh.models import ColumnDataSource

p = figure(
    title="Comparison Plot of A Parameters",
    plot_height=300,
    plot_width=500,
    y_range=sorted(a_data['full_name'].unique(), reverse=True),
    # =======================================
    tools="tap"
    # =======================================
)
    
# =======================================
source = ColumnDataSource(a_data)
# =======================================

p.scatter(
    # =======================================
    source=source,
    # =======================================
    
    x="final_value",
    y="full_name",    
    size=12,
    
    # =======================================
    alpha=0.5,
    selection_alpha=0.7,
    nonselection_alpha=0.2,
    
    color="color",
    selection_color="color",
    nonselection_color="color",
    # =======================================
)

p.hbar(
    source=source,
    left="conf_int_lower",
    right="conf_int_upper",
    y="full_name",
    height=0.01,
    
    # =======================================
    alpha=0.0,
    selection_alpha=1, 
    nonselection_alpha=0.0,
    
    color="color",
    selection_color="color",
    nonselection_color="color",
    # =======================================
)

show(p)

### Now do this for every group of parameters and arrange them in a grid ...

In [14]:
# we now have an additional input 
# it is a list of tuples of the name of the parameter group
# and the height allotted to that plot
param_groups_and_heights

[('a', 400), ('b', 400)]

In [15]:
from bokeh.layouts import gridplot

source = ColumnDataSource(df)
# =======================================
figures = []
for param_group_name, height in param_groups_and_heights:
    df_slice = df[df['group'] == param_group_name]
    param_names_to_plot = sorted(df_slice["full_name"].unique(), reverse=True)
    # =======================================

    p = figure(
        title="Comparison Plot of {} Parameters".format(param_group_name.title()),
        plot_height=300,
        plot_width=600,
        y_range=param_names_to_plot,
        tools="tap"
    )

    p.scatter(
        source=source,
        x="final_value",
        y="full_name",
        size=12,
        alpha=0.5,
        selection_alpha=0.7,
        nonselection_alpha=0.2,

        color="color",
        selection_color="color",
        nonselection_color="color",    
    )

    p.hbar(
    source=source,
    left="conf_int_lower",
    right="conf_int_upper",
    y="full_name",
    height=0.01,
    alpha=0.0,
    selection_alpha=0.25,
    nonselection_alpha=0.0,
    color="color",
    selection_color="color",
    nonselection_color="color",
)
    
    figures.append(p)

    
# =======================================
grid = gridplot(figures, ncols=1)

show(grid)
# =======================================

## Fade in Information when Hovering Over a Point

In [18]:
from bokeh.models import HoverTool

source = ColumnDataSource(df)
figures = []
for param_group_name, height in param_groups_and_heights:
    df_slice = df[df['group'] == param_group_name]
    param_names_to_plot = sorted(df_slice["full_name"].unique(), reverse=True)

    p = figure(
        title="Comparison Plot of {} Parameters".format(param_group_name.title()),
        plot_height=300,
        plot_width=500,
        y_range=param_names_to_plot,
        tools='tap',
    )

    scatter = p.scatter(
        source=source,
        x="final_value",
        y="full_name",
        size=12,
        alpha=0.5,
        selection_alpha=0.7,
        nonselection_alpha=0.2,

        color="color",
        selection_color="color",
        nonselection_color="color",    
    )
    
    p.hbar(
        source=source,
        left="conf_int_lower",
        right="conf_int_upper",
        y="full_name",
        height=0.01,
        alpha=0.0,
        selection_alpha=0.25,
        nonselection_alpha=0.0,
        color="color",
        selection_color="color",
        nonselection_color="color",
    )
    
    # =======================================    
    # Overlay additional information when hovering on a point
    to_display = [('Start Value', '@start_value'), ('Model Class', '@model_class')]
    hover = HoverTool(renderers=[scatter], tooltips=to_display)
    p.tools.append(hover)
    # =======================================

    figures.append(p)

    
grid = gridplot(figures, ncols=1)

show(grid)

## Advanced Interactions

## Only show confidence intervals of selected parameters

This processing of a user's input (usually by updating the data and then the plot) is called a **callback**. 

Up until now, we were using pre-implemented callbacks supplied by Bokeh. 

They all had in common that they only needed information of the row of the point that the user was hovering or clicking! (i.e. only mark that row selected or display information from that same row of our data set).

What we want to do next is more complicated: 

From the point the user selected, we want to identify the model that this point belongs to and then mark all points selected that also belong to this model and then update our back.

As this is non-standard we are going to implement a custom callback!

### To implement this we need:

1. a ColumnDataSource that is used by all plots as it tracks for us which points are selected and we can modify this information

2. a customizable implementation of TapTool

3. Code to convert the input from the user to a different selection

### Background Information: Custom Callbacks in Bokeh

- Bokeh is **not** a pure Python package. 

- Python's Bokeh library creates objects and passes them to BokehJS

- It is possible to have Python callbacks but to do that we'd have to set up a Bokeh server. 
    - Have a look at the implementation of the Estimagic Dashboard if you are interested 
    - Bokeh also has many examples using the command line command ``bokeh --serve``: https://bokeh.pydata.org/en/latest/docs/gallery.html#server-app-examples
    - for general information on Bokeh Servers see https://bokeh.pydata.org/en/latest/docs/user_guide/server.html


### Today: Pass a JavaScript callback to update the data to our TapTool.

#### Here is the JavaScript code:

```javascript

var chosen = source.selected.indices;
if (typeof(chosen) == "number"){
    var chosen = [chosen]
};

var chosen_models = [];

for (var i = 0; i < chosen.length; ++ i){
    chosen_models.push(source.data['model'][chosen[i]])
};

var chosen_models_indices = [];

for (var i = 0; i < source.data['index'].length; ++ i){
    if (chosen_models.includes(source.data['model'][i])){
        chosen_models_indices.push(i)
    };
};

source.selected.indices = chosen_models_indices;
source.change.emit();
```

In [17]:
from bokeh.models import TapTool
from bokeh.models.callbacks import CustomJS

source = ColumnDataSource(df)    
figures = []
    
for param_group_name, height in param_groups_and_heights:
    df_slice = df[df['group'] == param_group_name]
    param_names_to_plot = sorted(df_slice["full_name"].unique(), reverse=True)

    p = figure(
        title="Comparison Plot of {} Parameters".format(param_group_name.title()),
        y_range=param_names_to_plot,
        plot_height=height,
        plot_width=plot_width,
        # =============================================
        # NOT configuring the basic taptool here!
        # =============================================
    )

    # Give a name to each scatter glyph 
    # so each can get a TapTool
    scatter_glyph = p.scatter(
        source=source,
        x="final_value",
        y="full_name",
        marker="marker",
        size=12,
        color="color",
        selection_color="color",
        nonselection_color="color",
        alpha=0.5,
        selection_alpha=0.8,
        nonselection_alpha=0.2,
    )

    p.hbar(
        source=source,
        left="conf_int_lower",
        right="conf_int_upper",
        y="full_name",
        height=0.01,
        alpha=0.0,
        selection_alpha=0.25,
        nonselection_alpha=0.0,
        color="color",
        selection_color="color",
        nonselection_color="color",
    )

    
    # =============================================
    # Create the JavaScript callblack
    tap_js_kwargs = {"source": source}
    with open('tap_callback.js', 'r') as f:
        tap_js_code = f.read()
    tap_callback = CustomJS(args=tap_js_kwargs, code=tap_js_code)

    # Add the TapTool to each plot
    tap = TapTool(renderers=[scatter_glyph], callback=tap_callback)
    p.tools.append(tap)
    # =============================================
    
    figures.append(p)

grid = gridplot(figures, ncols=1)
show(grid)