## Prepping the data

In [3]:
import csv
from collections import defaultdict
from datetime import datetime as dt

def is_float(n):
    try:
        float(n)
    except ValueError:
        return False
    return True

with open("ks-projects-201612.csv", 'r', encoding='Windows-1252') as csvfile:
    csvreader = csv.reader(csvfile, delimiter=',', quotechar='"')
    rows = [row for row in csvreader]
    
header = rows.pop(0)
tuples = zip(range(len(header)), header)
trimmed_tuples = zip(range(len(header)), [h.rstrip() for h in header])
header_dict = dict(trimmed_tuples)

for i in range(13,17):
    header_dict[i] = "col_" + str(i)
header_dict[17] = "index"

for i in range(len(rows)):
    rows[i].append(i)

col_dict = {v:k for k,v in header_dict.items()}
    
good_states = ["failed", "successful", "canceled", "live", "suspended"]
clean_rows = [row for row in rows if row[col_dict["state"]] in good_states]

cat_success = defaultdict(int)
cat = defaultdict(int)
for row in clean_rows:
    cat[row[col_dict['main_category']]] += 1
    if row[col_dict['state']] == 'successful':
        cat_success[row[col_dict['main_category']]] += 1

cat_bar_values = []
success_cat_bar_values = []
cat_bar_label = []
for k,v in sorted(cat.items(), key=lambda item: item[1], reverse=True):
    if v >= 10:
        cat_bar_label.append(k)
        cat_bar_values.append(cat[k])
        success_cat_bar_values.append(0 if k not in cat_success else cat_success[k])
        
        
pledge_backers_rows = [r for r in clean_rows if r[col_dict['backers']].isdigit() and is_float(r[col_dict['usd pledged']])]
pledge_backers_clean_rows = [r for r in pledge_backers_rows if int(r[col_dict['backers']]) > 0 and float(r[col_dict['usd pledged']]) > 50000]

names = [r[col_dict['name']] for r in pledge_backers_clean_rows]
backers = [int(r[col_dict['backers']]) for r in pledge_backers_clean_rows]
pledges = [float(r[col_dict['usd pledged']]) for r in pledge_backers_clean_rows]
category = [r[col_dict['main_category']] for r in pledge_backers_clean_rows]  

dt_format = '%Y-%m-%d %H:%M:%S'
launched = [dt.strptime(r[col_dict['launched']], dt_format).date() for r in pledge_backers_clean_rows]

countries = [ 'AT', 'AU', 'BE', 'CA', 'CH', 'DE', 'DK', 'ES', 'FR', 'GB', 'HK', 'IE', 'IT', 'LU', 'MX', 'NL', 'NO', 'NZ', 'SE', 'SG', 'US']
good_country_rows = [row for row in rows if row[col_dict['country']] in countries]
        

In [4]:
header_dict

{0: 'ID',
 1: 'name',
 2: 'category',
 3: 'main_category',
 4: 'currency',
 5: 'deadline',
 6: 'goal',
 7: 'launched',
 8: 'pledged',
 9: 'state',
 10: 'backers',
 11: 'country',
 12: 'usd pledged',
 13: 'col_13',
 14: 'col_14',
 15: 'col_15',
 16: 'col_16',
 17: 'index'}

# Bokeh

An interactive visualization library, mixing python and javascript. It is opensource under the BSD license.

It can be used standalone, or embedded in Jupyter and Zeppelin notebooks.

Always refer to the official documentation: https://docs.bokeh.org/en/latest/docs/reference.html

In [5]:
from bokeh.plotting import figure, show, output_notebook, output_file, reset_output
from bokeh.models import ColumnDataSource, CategoricalColorMapper, PrintfTickFormatter, NumeralTickFormatter, Legend, CDSView, GroupFilter, CustomJS, BoxSelectTool, FactorRange
from bokeh.layouts import gridplot, column
from bokeh.models.widgets import Div
from bokeh.palettes import Spectral6, Pastel1, Category20c, Inferno256


output_notebook()

## Output and running mode

Different output mode are provided: to file and to notebook.

The output to file mode is activated with the command `output_file(<filename>)`: in this mode, the output generated by bokeh is saved to a file when `show` is called.

Conversely, in notebook mode --activated with the command `output_notebook()`-- the output is directed to a notebook cell. Note that, the 2 modes can be active at the same time.

## Histogram - bar plot

Bar plot can be created in a similar way as in matplotlib/seaborn. The main difference is that the output is an interactive (javascript based) chart with a default toolbar to zooom, pan, select, save, etc. All options can be customized.

The bar plot creation is composed of 3 steps:
1. create the plot object. We do this with the figure method, which creates a Figure, a subclass of Plot that comes with a default configuration (axes, grids, tools, etc.). The Plot object will then contain the glyphs
2. create the glyphs within the plot. This can be done with convenience methods on the Plot object for a number of different charts.
3. render the plot within a cell. This is achieved with the show method on the plot, which renders it in a cell if the output mode is notebook.

In [6]:
p = figure(x_range=FactorRange(factors=cat_bar_label))
p.vbar(x=cat_bar_label, top=cat_bar_values, width=1)

show(p)

The `x_range=FactorRange(factors=cat_bar_label)` parameter in the figure call is used to tell the Plot object that the x axis is categorical. The FactorRange is a range of values for a categorical dimension.

Creating the FactorRange object explicitly is not required. We can directly pass a list to the figure object and the FactorRange will be created for us automatically.

Let's now customize the plot a little bit to make it better, for example by fixing the spacing, the y axis start, and by removing the vertical grid.

In [7]:
p = figure(x_range=cat_bar_label, plot_height=300, plot_width=700,toolbar_location=None, tools="")

p.vbar(x=cat_bar_label, top=cat_bar_values, width=0.9)

p.xgrid.grid_line_color = None
p.y_range.start = 0

show(p)

Zooming and panning in a bar plot are actually counter productive, so we can completely remove the toolbar. We also rotate the label so that thet don't overlap.

In [8]:
p = figure(x_range=cat_bar_label, plot_height=300, 
           plot_width=700,toolbar_location=None, tools="")

p.vbar(x=cat_bar_label, top=cat_bar_values, width=0.9)

p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = 0.45
p.y_range.start = 0

show(p)

Now we also format the numbers in the y axis with a NumeralTickFormatter (there are several of these classes for different type of objects) and remove the minor ticks.

We also add the toolbar back, but with only the possibility to save the plot.

In [9]:
p = figure(x_range=cat_bar_label, plot_height=300, 
           plot_width=700, tools="save")

p.vbar(x=cat_bar_label, top=cat_bar_values, width=0.9)

p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = 0.45
p.y_range.start = 0
p.yaxis[0].formatter = NumeralTickFormatter(format="0,0")
p.yaxis.minor_tick_line_color=None

show(p)

## ColumnDataSource

So far, we have provided data to bokeh directly (via lists) without an intermediate model. To create more advanced graphs, possibly composed of several components, and to do this easily, bokeh provides the ColumnDataSource class, which makes it easy to share data across multiple plots and to share selections on such data.

The ColumnDataSource object can be created from lists or from pandas dataframes.

A ColumnDataSource is composed of a number of named columns, that can be created from a dictionary. The columns should always have the same lenght.

When creating glyphs, we can reference the columns by name in the ColumnDataSource.

In [10]:
data = {'category_name': cat_bar_label, 
        'project_count': cat_bar_values}

source = ColumnDataSource(data=data)

p = figure(x_range=cat_bar_label, plot_height=400, plot_width=700,
           toolbar_location="right", tools="save", title="Project counts by categories",
           x_axis_label="Project category", y_axis_label="Project count")

p.vbar(x='category_name', top='project_count', width=0.9, source=source)

p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = 0.45
p.y_range.start = 0
p.yaxis[0].formatter = NumeralTickFormatter(format="0,0")
p.yaxis.minor_tick_line_color=None

show(p)

## Tooltip

One important feature of interactive charts is the possibility to inspect the element, to know more information about them. One way to provide additional information is to use a tooltip. Tooltips integrate nicely with ColumnDataSource, and the data to be displayed can be accessed by referring to the column name or by plot propertis (index, x/y positions, etc).

In [11]:
data = {'category_name': cat_bar_label, 
        'project_count': cat_bar_values}

source = ColumnDataSource(data=data)

TOOLTIPS = [
    ("Project count", "@project_count{0,0}"),
    ("Category", "@category_name"),
    ("index", "$index")
]

p = figure(x_range=cat_bar_label, plot_height=300, plot_width=700,
           toolbar_location=None, tools="", title="Project counts by categories",
           x_axis_label="Project category", y_axis_label="Project count",
           tooltips=TOOLTIPS)

p.vbar(x='category_name', top='project_count', width=0.9, source=source)

p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = 0.45
p.y_range.start = 0
p.yaxis[0].formatter = NumeralTickFormatter(format="0,0")
p.yaxis.minor_tick_line_color=None

show(p)

## Stacked bar plot

With ColumnDataSource it's easy to create more advanced types of bar plot, like for example stacked bar plot. You need to have multiple columns in the ColumnDataSource to represents the different bar sets, and then you can reference them by column name.

In the example below we also see how to customize the selection tool, to make it more useful for a bar plot, where selection on the y dimension does not make sense. Note also how we use an additional column (`All project count`) not for the bar plot itself, but only as an additional piece of information for the tooltip.

In [12]:
unsuccess_cat_bar_values = [x - y for (x,y) in zip(cat_bar_values, success_cat_bar_values)]

data = {'Category name': cat_bar_label, 
        'Unsuccessful project count': unsuccess_cat_bar_values,
        'Successful project count': success_cat_bar_values,
        'All project count': cat_bar_values}

source = ColumnDataSource(data=data)

TOOLTIPS = [
    ("Category", "@{Category name}"),
    ("Unsuccessful project count", "@{Unsuccessful project count}{0,0}"),
    ("Successful project count", "@{Successful project count}{0,0}"),
    ("All project count", "@{All project count}{0,0}"),
]

p = figure(x_range=cat_bar_label, plot_height=400, plot_width=700,
           title="Unsuccessful and successful project counts by category",
           x_axis_label="Project category", y_axis_label="Unsuccessful and successful project counts",
           tooltips=TOOLTIPS, tools="save,reset")

box_select = BoxSelectTool(dimensions='width')
p.add_tools(box_select)

count_types = ["Successful project count", "Unsuccessful project count"]
p.vbar_stack(count_types, x='Category name', width=0.9, source=source, 
             color=["#e84d60", "#718dbf"], legend_label=count_types)

p.xgrid.grid_line_color = None
p.y_range.start = 0
p.xaxis.major_label_orientation = 0.45

show(p)

## Multiple plots

Depending on what you want to learn from the data, stacked bar plot is not always the best solution. Splitting the data into 2 different bar plots can show both trends.

This can be achieved by using layouts: by column, by row, or the most generic grid layout. The steps we follow are:
1. create the common ColumnDataSource
2. create the 2 plots (and the corresponding glyphs) individually
3. layout the plots in a grid layout. The grid can be composed of plots as well as custom divs.

Note that, by using the same ColumnDataSource the selection is linked.

In [13]:
data = {'category_name': cat_bar_label, 
        'project_count': cat_bar_values,
        'successful_project_count': success_cat_bar_values}

source = ColumnDataSource(data=data)

TOOLTIPS = [
    ("Category", "@category_name"),
    ("Project count", "@project_count{0,0}"),
    ("Successful project count", "@successful_project_count{0,0}")    
]

p1 = figure(x_range=cat_bar_label, plot_height=350, plot_width=700,
           y_axis_label="Total project count", tooltips=TOOLTIPS,
           tools='reset,save')

box_select1 = BoxSelectTool(dimensions='width')
p1.add_tools(box_select1)

p1.vbar(x='category_name', top='project_count', width=0.9, source=source)

p1.xgrid.grid_line_color = None
p1.xaxis.major_label_text_font_size = '0pt'
p1.xaxis.major_tick_line_color = None
p1.y_range.start = 0

p2 = figure(x_range=cat_bar_label, plot_height=350, plot_width=700,
            y_axis_label="Successful project count", x_axis_label="Project category",
            tooltips=TOOLTIPS, tools='reset,save')
box_select2 = BoxSelectTool(dimensions='width')
p2.add_tools(box_select2)
p2.vbar(x='category_name', top='successful_project_count', width=0.9, source=source)

p2.xgrid.grid_line_color = None
p2.y_range.start = 0
p2.xaxis.major_label_orientation = "vertical"

# make a grid
grid = gridplot([p1, p2], ncols=1)

title = Div(text="<b>Total and successful project counts by category</b>")
show(column(title, grid))



## Scatterplot

To create a scatterplot we use the circle convenience method on the figure (plot) object. We map the x and y positions on specific columns of the ColumnDataSource object.

In addition, we map the project main category on the circle color. To do this we use a particular type of mapper, the CategoricalColorMapper. A number of other mappers exist: https://docs.bokeh.org/en/latest/docs/reference/models/mappers.html.

Note also how we can create a legend (for the project category) by simply referring to the ColumnDataSource column.

In [14]:
data = {'pledges': pledges,
        'backers': backers,
        'category': category,
        'names': names}

TOOLTIPS = [
    ("Name", "@names"),
    ("Category", "@category"),
    ("Pledges", "$ @pledges{0,0}"),
    ("Backers", "@backers{0,0}")
]

source = ColumnDataSource(data=data)
category_uniq = list(set(category))
color_mapper = CategoricalColorMapper(factors=category_uniq, palette=Category20c[len(category_uniq)])

p = figure(plot_width=700, plot_height=500, tooltips=TOOLTIPS,
           title="Pledges vs backers")
p.circle(x='pledges', y='backers', 
         color={'field': 'category', 'transform': color_mapper}, 
         source=source, alpha=0.9, legend_field='category')
p.xaxis.axis_label = "Pledges"
p.yaxis.axis_label = "Backers"
p.x_range.start = 0
p.y_range.start = 0
p.add_tools(BoxSelectTool())

p.toolbar.autohide = True

show(p)

## Callbacks

Now we want to create 2 plots, and update one based on a selection on the other one. For example, we can create the bar plot of the project category and the scatterplot of pledges vs backers, and in the scatterplot, only show the dots for the project belonging to the categories selected in the bar plot.

To do this, we need to update the scatterplot by attaching a callback to the barplot.

We start by creating the bar plot and by attaching a test callback on the selection event. The DataSource class (superclass of ColumnDataSource) has a `selected` property that provides the selected indices on the DataSource.
By using the `on_change` method (part of the base `Model` bokeh class) on the `selected` property, we can attach the callback to a change in the selection of the datasource.

In [15]:
data = {'category_name': cat_bar_label, 
        'project_count': cat_bar_values}

source = ColumnDataSource(data=data)

TOOLTIPS = [
    ("Project count", "@project_count{0,0}"),
    ("Category", "@category_name"),
    ("index", "$index")
]

p = figure(x_range=cat_bar_label, plot_height=300, plot_width=700,
           toolbar_location=None, tools="", title="Project counts by categories",
           x_axis_label="Project category", y_axis_label="Project count",
           tooltips=TOOLTIPS)
box_select = BoxSelectTool(dimensions='width')
p.add_tools(box_select)

p.vbar(x='category_name', top='project_count', width=0.9, source=source)

p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = 0.45
p.y_range.start = 0
p.yaxis[0].formatter = NumeralTickFormatter(format="0,0")
p.yaxis.minor_tick_line_color=None

def callback(attr, old, new):
    print(new)

source.selected.on_change("indices", callback)

show(p)

You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html



The error here is telling us that what we are trying to do is not possible, as python callback cannot be attached to standalone html output (because there is no place for them to run).

We thus have 2 possible options:
1. use javascript callback that can run in the html output
2. use python callback and a bokeh server to serve such callbacks

We show the javascript callback example here, and we'll use a bokeh server later in the notebook.

In the code below note that there are javascript correspondents to our python objects. To update the scatterplot we have to manipulate the javascript corresponding objects.

In [16]:
detailed_data = {'pledges': pledges,
                 'backers': backers,
                 'category': category,
                 'names': names}

category_uniq = list(set(category))

TOOLTIPS1 = [
    ("Name", "@names"),
    ("Category", "@category"),
    ("Pledges", "$ @pledges{0,0}"),
    ("Backers", "@backers{0,0}")
]


source1 = ColumnDataSource(data=detailed_data)
immutable_source = ColumnDataSource(data=detailed_data)
color_mapper = CategoricalColorMapper(factors=category_uniq, palette=Category20c[15])

p1 = figure(plot_width=700, plot_height=500, tooltips=TOOLTIPS1, title="Pledges vs backers")
p1.circle(x='pledges', y='backers', 
         color={'field': 'category', 'transform': color_mapper}, 
         source=source1, alpha=0.7, legend_field='category', size=5)

p1.xaxis.axis_label = "Pledges"
p1.yaxis.axis_label = "Backers"
p1.x_range.start = 0
p1.y_range.start = 0

agg_data = {'category_name': cat_bar_label, 
        'project_count': cat_bar_values}

source2 = ColumnDataSource(data=agg_data)

TOOLTIPS2 = [
    ("Category", "@category_name"),
    ("Project count", "@project_count{0,0}")
]

p2 = figure(x_range=cat_bar_label, plot_height=250, plot_width=700,
           y_axis_label="Project count", 
           x_axis_label="Project category", tooltips=TOOLTIPS2,
           tools='save', title="Project count by category")
box_select = BoxSelectTool(dimensions='width')
p2.add_tools(box_select1)

p2.vbar(x='category_name', top='project_count', width=0.9, source=source2)

p2.xaxis.major_label_orientation = 0.45
p2.xgrid.grid_line_color = None
p2.y_range.start = 0
p2.yaxis.minor_tick_line_color=None

source2.selected.js_on_change('indices', CustomJS(args=dict(source1=source1, source2=source2, immutableSource=immutable_source),
                              code="""
        var indices = cb_obj.indices;
        var data1 = source1.data;
        var data2 = source2.data;
        var immutableData = immutableSource.data;
        var categories = indices.map(index => data2['category_name'][index])
        data1['pledges'] = [];
        data1['backers'] = [];
        data1['category'] = [];
        data1['names'] = [];
        for (var i = 0; i < immutableData['pledges'].length; i++) {
            if (categories.includes(immutableData['category'][i])) {
                data1['pledges'].push(immutableData['pledges'][i]);
                data1['backers'].push(immutableData['backers'][i]);
                data1['category'].push(immutableData['category'][i]);
                data1['names'].push(immutableData['names'][i]);
            }
        }
        source1.change.emit();
    """)
)


show(column(p1,p2))

## Datatables & Widgets

### Datatable

Datatable is a tabular representation of data, which supports showing and editing the data. It is composed of TableColumn objects and it is highly customizable.

In [17]:
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn

detailed_data = {'pledges': pledges,
                 'backers': backers,
                 'category': category,
                 'names': names}

source = ColumnDataSource(data=detailed_data)

columns=[TableColumn(field="names", title="Name"), 
         TableColumn(field="pledges", title="Pledges"), 
         TableColumn(field="backers", title="Backers"), 
         TableColumn(field="category", title="Category")]

data_table = DataTable(source=source, columns=columns, editable=True)
show(data_table)

In [18]:
data_table.source.data['backers']

[224,
 855,
 632,
 2784,
 120,
 561,
 750,
 3784,
 3080,
 665,
 2789,
 622,
 1711,
 694,
 1822,
 942,
 443,
 2412,
 637,
 257,
 2108,
 600,
 2140,
 3527,
 2256,
 405,
 290,
 854,
 2485,
 8351,
 829,
 1068,
 1588,
 97,
 310,
 505,
 5564,
 621,
 1157,
 581,
 558,
 263,
 5620,
 1379,
 719,
 1198,
 695,
 7682,
 458,
 1366,
 1606,
 202,
 1794,
 878,
 81,
 412,
 1979,
 342,
 3297,
 901,
 425,
 561,
 539,
 658,
 450,
 368,
 1526,
 351,
 848,
 4726,
 1384,
 4091,
 2224,
 2282,
 1093,
 1558,
 846,
 810,
 26457,
 1768,
 873,
 1537,
 139,
 945,
 2477,
 1107,
 611,
 907,
 873,
 951,
 1483,
 1739,
 874,
 706,
 308,
 1945,
 430,
 814,
 610,
 36863,
 281,
 262,
 739,
 1858,
 563,
 883,
 399,
 1011,
 2654,
 539,
 4420,
 3317,
 591,
 4730,
 679,
 957,
 3792,
 9522,
 38,
 1203,
 3397,
 2148,
 479,
 1467,
 241,
 631,
 136,
 895,
 317,
 280,
 643,
 547,
 1620,
 1670,
 1667,
 2213,
 63416,
 1869,
 5051,
 1806,
 73,
 5194,
 585,
 1077,
 791,
 160,
 2395,
 2322,
 546,
 2011,
 911,
 427,
 484,
 1304,
 886,
 1

### Datacube

Datacube, introduced in bokeh 1.3.0 (https://docs.bokeh.org/en/latest/docs/releases.html?highlight=datacube#release-1-3-0), is a specialized datatable that provides collapsing groups and aggregation metrics for these groups (e.g., totals and sub-totals).

The grouping is provided by a GroupingInfo object, which in turn uses a getter as a "group by" criterion and an aggregator object (e.g., SumAggregator) to compute the aggregation metrics.

In [19]:
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn, GroupingInfo, SumAggregator, DataCube

detailed_data = {'backers': backers,
                 'category': category,
                 'names': names}

source = ColumnDataSource(data=detailed_data)
columns=[TableColumn(field="names", title="Category / Name"), 
         TableColumn(field="backers", title="Backers")]

grouping = [
    GroupingInfo(getter='category', aggregators=[SumAggregator(field_='backers')])
]

target = ColumnDataSource(data=dict(row_indices=[], labels=[]))

data_cube = DataCube(source=source, columns=columns, grouping=grouping, target=target)

show(data_cube)


## Tabs

Selectable tabs that contains plots or widgets or layouts.

Tabs are composed of a list of Panel objects, where a Panel has a child and a title.

In [20]:
from bokeh.models import Panel, Tabs


data1 = {'category_name': cat_bar_label, 
        'project_count': cat_bar_values}

source1 = ColumnDataSource(data=data1)

TOOLTIPS1 = [
    ("Project count", "@project_count{0,0}"),
    ("Category", "@category_name")
]

p1 = figure(x_range=cat_bar_label, plot_height=300, plot_width=700,
           toolbar_location=None, tools="", title="Project counts by categories",
           x_axis_label="Project category", y_axis_label="Project count",
           tooltips=TOOLTIPS1)

p1.vbar(x='category_name', top='project_count', width=0.9, source=source1)

p1.xgrid.grid_line_color = None
p1.xaxis.major_label_orientation = "vertical"
p1.y_range.start = 0
p1.yaxis[0].formatter = NumeralTickFormatter(format="0,0")
p1.yaxis.minor_tick_line_color=None

data2 = {'backers': backers,
        'category': category,
        'names': names}
source2 = ColumnDataSource(data=data2)

columns=[TableColumn(field="names", title="Name"), 
         TableColumn(field="backers", title="Backers")]

grouping = [
    GroupingInfo(getter='category', aggregators=[SumAggregator(field_='backers')])
]

target = ColumnDataSource(data=dict(row_indices=[], labels=[]))

data_cube = DataCube(source=source2, columns=columns, grouping=grouping, target=target)

tab1 = Panel(child=p1, title="Categories (bar)")
tab2 = Panel(child=data_cube, title="Backers (table)")
tabs = Tabs(tabs=[ tab1, tab2 ])

show(tabs)


## Creating an interactive dashboard

To create this interactive dashboard we will use, in addition to what we have already seen, MultiSelect and Slider widgets. We will also use a bokeh server and python callbacks to update the dashboard based on selections in the widgets.

### Bokeh server

To be able to use python callback, we need a bokeh server to run our callback function code. A bokeh server can be run from the command line with:

`bokeh serve --show --port 5001 bokeh_dashboard`

However, if we want to have our dashboard within the notebook, we can use an embedded server, provided by the `Application` class, which is a factory to create bokeh documents. The `Application` takes a function handler which is used to process bokeh documents. 

The function handler will essentially create the dashboard and attach it to the bokeh document that the handler receives in input. Within the function handler we will also add the python callback and we will register them to widget selection events.

### Multiselect & Slider

A Multiselect shows multiple available options and supports multiple selections. The input is a list of possible options.

A Slider shows an interval (start - end), where only the end can be moved. To move also the start of the interval a RangeSlider can be used. A DateSlider (simple or range) is a particular type of slider for date objects.

In [21]:
from bokeh.models.widgets import MultiSelect, Slider, DateRangeSlider
from bokeh.layouts import row
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
from bokeh.models.ranges import Range1d

def modify_doc(doc):
        
    data = {'pledges': pledges,
                 'backers': backers,
                 'category': category,
                 'names': names,
                 'launched' : launched}

    categories = list(set(category))

    TOOLTIPS1 = [
        ("Name", "@names"),
        ("Category", "@category"),
        ("Pledges", "$ @pledges{0,0}"),
        ("Backers", "@backers{0,0}"),
        ("Launched", "@launched"),
    ]


    source = ColumnDataSource(data=data)

    color_mapper = CategoricalColorMapper(factors=categories, palette=Category20c[len(categories)])

    backer_range = Range1d(min(backers), max(backers))
    pledges_range = Range1d(min(pledges), max(pledges))
    
    p = figure(plot_width=550, plot_height=500, tooltips=TOOLTIPS1,
               title="Pledges vs backers", sizing_mode="fixed", 
               x_range=pledges_range, y_range=backer_range)
    p.circle(x='pledges', y='backers', 
         color={'field': 'category', 'transform': color_mapper}, 
         source=source, alpha=1, legend_field='category', size=5)

    p.xaxis.axis_label = "Pledges"
    p.yaxis.axis_label = "Backers"
        
    select = MultiSelect(title="Project category:", 
                           options=categories, width=200, height=300, value=categories)
        
    max_backers = max(backers)
    slider = Slider(start=0, end=max_backers, value=0, 
                    step=1, title="Filter by number of backers",
                    width=200)

    mindate = min(launched)
    maxdate = max(launched)
    
    date_slider = DateRangeSlider(start=mindate, end=maxdate, value=(mindate, maxdate),
                    title="Filter by project launched",
                    width=550)

    
    def filter_data():
        selected_categories = select.value
        min_backers = slider.value
        min_date = date_slider.value_as_date[0]
        max_date = date_slider.value_as_date[1]

        new_data = {'pledges': [],
                 'backers': [],
                 'category': [],
                 'names': [],
                 'launched': []}
        
        for x in range(len(data['names'])):
            if data['category'][x] in selected_categories and data['backers'][x] >= min_backers:
                if data['launched'][x] >= min_date and data['launched'][x] <= max_date:
                    new_data['pledges'].append(data['pledges'][x])
                    new_data['backers'].append(data['backers'][x])
                    new_data['category'].append(data['category'][x])
                    new_data['names'].append(data['names'][x])
                    new_data['launched'].append(data['launched'][x])
                
        return ColumnDataSource(data=new_data)

    
    def update_data(attr, old, new):
        updated_source = filter_data()
        source.data.update(updated_source.data)
        
    
    select.on_change("value", update_data)
    slider.on_change("value", update_data)
    date_slider.on_change("value", update_data)

    doc.add_root(row(column(select, slider), column(p, date_slider)))

handler = FunctionHandler(modify_doc)
app = Application(handler)

show(app)