# A Bouquet of Bokeh

![models](https://bokeh.pydata.org/en/latest/_images/document.svg)

In [1]:
# whoa that's a big import
import pandas as pd

from bokeh.models import (ColumnDataSource, DataRange1d, 
                          FactorRange, CategoricalScale, CategoricalTicker, 
                          BasicTicker, FixedTicker)
from bokeh.models.tools import HoverTool, WheelZoomTool
from bokeh.models.formatters import NumeralTickFormatter
from bokeh.plotting import figure, show
from bokeh.embed import components
from bokeh.io import output_notebook, push_notebook, curdoc
from bokeh.transform import jitter, factor_cmap, LinearColorMapper
from bokeh.palettes import RdYlGn3, Inferno256
from bokeh.models.widgets import Slider, Div
from bokeh.models.callbacks import CustomJS
from bokeh.layouts import column
from bokeh.models.glyphs import Triangle
from bokeh.client import push_session
from bokeh.document import Document

import numpy as np
from sklearn.linear_model import LinearRegression

Don't be scared.
> I must not fear.
> Fear is the mind-killer.

## Airbnb Dataset

In [2]:
df = pd.read_csv('airbnb_data.csv')
df = df[['neighborhood','bedrooms', 'price', 'reviews', 'accommodates']]
df.head()

Unnamed: 0,neighborhood,bedrooms,price,reviews,accommodates
0,Downtown,1.0,150.0,1,4
1,Long Beach,1.0,130.0,0,1
2,Long Beach,1.0,130.0,0,2
3,Glendale,1.0,125.0,0,1
4,Koreatown,1.0,120.0,0,1


In [3]:
# Dataframe is put into a ColumnDataSource -- Used by Bokeh internals
cds = ColumnDataSource(df)
# A plot is created
p = figure()
# Circle glyph is instantiated within the plot
circ = p.circle(x='bedrooms', y='price', source=cds)

In [4]:
# Loads plots directly into the notebook when show() is called
output_notebook()

In [5]:
# Sample down the data to a random 10,000 rows
pd.np.random.seed(5)
df_ds = df.sample(n=10000)
df_ds.shape

(10000, 5)

## Building a plot, object by object, model by model

In [6]:
# Internally, all Bokeh data is stored in a CDS
cds = ColumnDataSource(df_ds)
p = figure()
circ = p.circle(x='bedrooms', y='price', source=cds)

show(p)

In [7]:
# HoverTool object is instantiated with its default properties
p.add_tools(HoverTool())
# No variable assignment in previous step, so we must find the HoverTool instance with select()
ht = p.select(HoverTool)
# Edit property to display the desired values in the hover tip
ht.tooltips = [
    ("Index", '$index'),
    ("Room type", '@room_type'),
    ("City", "@city"),
    ("Neighborhood", "@neighborhood"),
    ("Review", "@reviews"),
    ("Price", "$@price")
]

# Access the circle glyph's properties and modify them
circ.glyph.size = 8.5
circ.glyph.line_color = (12,255,23,0.4)
circ.glyph.line_width = 1
# Replace the x axis 'bedrooms' with a jittered 'bedrooms' 
circ.glyph.x = jitter('bedrooms', width=0.5)
# Tick values on the x-axis changed to the discrete number of bedrooms
p.xaxis.ticker = FixedTicker(ticks=list(set(cds.data['bedrooms'])))
# Y ticks are formatted to display currency -- formats available in Bokeh documentation
p.yaxis.formatter = NumeralTickFormatter(format='$ 0,0[.]00')
# Annotate the plot
p.xaxis.axis_label = "# of Bedrooms"
p.yaxis.axis_label = "Price of room per day"
p.title.text = "Beds vs Price"
p.title.align = "center"
# Modifies the grid lines
p.ygrid.grid_line_color = 'gray'
p.xgrid.visible = False
p.ygrid.minor_grid_line_color = 'blue'
p.ygrid.minor_grid_line_alpha = 0.1

# Specify the initial values of the y range
p.y_range.start = -100
p.y_range.end = 5000
# Specify the bounds at which the graph is no longer accessible
p.x_range.bounds = (-2, 11)
p.y_range.bounds = (-5000, 30000)


show(p)

In [8]:
p.width = 900

p.min_border = 45

p.background_fill_color = p.border_fill_color = (64, 48, 117, 0.3)

p.border_fill_alpha = 0.2

p.axis.axis_label_text_font_size = p.title.text_font_size = '20px'

p.axis.major_label_text_font_size = '14px'

p.toolbar_location = 'above'

p.select_one(WheelZoomTool).dimensions = 'height'
show(p)

### Embedding

In [9]:
script, div = components(p)
print('Script: \n' + '*'*50 + script[0:1000])
print('\n\nDiv: \n' + '*'*50 + div)

Script: 
**************************************************
<script type="text/javascript">
    (function() {
  var fn = function() {
    Bokeh.safely(function() {
      (function(root) {
        function embed_document(root) {
          var docs_json = {"57c840ea-a2e4-484c-b092-4d38311f1530":{"roots":{"references":[{"attributes":{"axis_label":"Price of room per day","axis_label_text_font_size":{"value":"20px"},"formatter":{"id":"55149bfb-2ecd-42c7-8996-1b24b6a78468","type":"NumeralTickFormatter"},"major_label_text_font_size":{"value":"14px"},"plot":{"id":"7e86b9d2-0edd-4c76-b0a5-7a2876330df4","subtype":"Figure","type":"Plot"},"ticker":{"id":"d26334a5-dad2-4dee-9489-d764bdb8d8e2","type":"BasicTicker"}},"id":"f47fab22-61b3-4573-b207-1061b9cb6f90","type":"LinearAxis"},{"attributes":{},"id":"d26334a5-dad2-4dee-9489-d764bdb8d8e2","type":"BasicTicker"},{"attributes":{"range":null,"width":0.5},"id":"c86d010a-5a71-478d-9bf1-0a66f7af032a","type":"Jitter"},{"attributes":{"background_fill_color"

#### Resources

``` html
<link
    href="http://cdn.pydata.org/bokeh/release/bokeh-0.12.9.min.css"
    rel="stylesheet" type="text/css">
<link
    href="http://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.9.min.css"
    rel="stylesheet" type="text/css">
<link
    href="http://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.9.min.css"
    rel="stylesheet" type="text/css">

<script src="http://cdn.pydata.org/bokeh/release/bokeh-0.12.9.min.js"></script>
<script src="http://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.9.min.js"></script>
<script src="http://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.9.min.js"></script>
```

### Autoloading - for the smart and lazy

``` python
from bokeh.resources import CDN
from bokeh.embed import autoload_static

js, tag = autoload_static(p, CDN, "some/path")
```

* JS goes into *some/path*.

* Script goes into your code.

``` js
<script
    src="some/path"
    id="c5339dfd-a354-4e09-bba4-466f58a574f1"
    async="true"
    data-bokeh-data="static"
    data-bokeh-modelid="7b226555-8e16-4c29-ba2a-df2d308588dc"
    data-bokeh-modeltype="Plot"
    data-bokeh-loglevel="info"
></script>
```


## Bar graph with groups

In [10]:
hoods = df.groupby('neighborhood')['neighborhood'].count().sort_values(ascending=False)[0:5].index

df_h = df[df.neighborhood.isin(hoods)]

df_h.loc[:,'price_q'] = pd.qcut(df_h['price'], 3, labels=['Low', 'Medium', 'High'])

group = df_h.groupby(['neighborhood', 'price_q'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [11]:
source = ColumnDataSource(group)

colors =  factor_cmap('neighborhood_price_q', palette=RdYlGn3, factors=list(df_h['price_q'].unique().sort_values()), start=1)

f = figure(plot_width=950, title="Day price by Neighborhood", x_range=group, background_fill_color='grey', tools='')

f.vbar(x='neighborhood_price_q', top='price_count', width=1, fill_color=colors, line_color='white', source=source)

f.x_range.range_padding = .1

f.xgrid.visible = False

def sfunc(x):
    if x[1] == 'High':
        return 0
    elif x[1] == 'Medium':
        return 1
    elif x[1] == 'Low':
        return 2

f.x_range.factors.sort(key=sfunc)
f.x_range.factors.sort(key=lambda tup: tup[0])


show(f)

## Unlimited power!

In [12]:
%%HTML
<iframe width="560" height="315" src="https://www.youtube.com/embed/PAGjm4BMKlk?start=1340" frameborder="0" allowfullscreen></iframe>

## Interactive Widgets. No server required.

In [13]:
slider = Slider(start=0, end=cds.data['reviews'].max(), value=0, step=1, title='At least this # of reviews:(Slide me pls)')
slider.bar_color = 'pink'
ccds = ColumnDataSource(cds.to_df())
slider.callback = CustomJS(args=dict(source=cds, ccds=ccds, p=p), code=
                          """
                          var data = source.data;                          
                          var cdata = ccds.data;                          
                          var slider_val = cb_obj.value;                          
                          var len = cdata['reviews'].length;                                           
                                                                           
                          var new_data = {
                            'bedrooms': [],
                            'price': [],
                            'reviews': []
                          };
                          
                          for(var i = 0; i < cdata['reviews'].length; i++) {
                            if(cdata['reviews'][i] >= slider_val) {
                                new_data['bedrooms'].push(cdata['bedrooms'][i]);
                                new_data['price'].push(cdata['price'][i]);
                                new_data['reviews'].push(cdata['reviews'][i]);
                            }
                          }
                          
                          source.data = new_data;
                          source.change.emit();
                          """
                          )
layout = column(slider, p)
show(layout)

#### Other Widgets
![widgets](widgets.png)

### Dynamic Linear Regression of a Selection

To run the following, bokeh server must be running. 
Start with *bokeh serve*

### More types of glyphs
![glyphs](glyphs.png)

### Network Graphs
![network](network.png)

### Geodata
![chloropleth](chloropleth.png)

#### With tile providers
![tile](tile.png)

## Big data
**USE DATASHADER**

Dask + Datashader + Bokeh allow you to explore massive datasets
![datashader](datashader.png)

## Holoviews
``` python
# Declare
from bokeh.sampledata.iris import flowers
from holoviews.operation import gridmatrix

ds = hv.Dataset(flowers)

grouped_by_species = ds.groupby('species', container_type=hv.NdOverlay)
grid = gridmatrix(grouped_by_species, diagonal_type=hv.Scatter)

# Plot
plot_opts = dict(tools=['hover', 'box_select'], bgcolor='#efe8e2')
style = dict(fill_alpha=0.2, size=4)

grid({'Scatter': {'plot': plot_opts, 'style': style}})
```

![holoviews](holoviews.png)