In [1]:
import pandas as pd
import numpy as np
from bokeh.io import output_file,show,output_notebook,push_notebook
from bokeh.plotting import *
from bokeh.models import ColumnDataSource,HoverTool,CategoricalColorMapper
from bokeh.layouts import row,column,gridplot,widgetbox
from bokeh.models.widgets import Tabs,Panel

Bokeh export the plots into html files directly so it can be used in a webpage. But since here we are woring in jupyter notebook, we need Bokeh to return everything within the notebook environment. To do that we use the following command. 

In [2]:
output_notebook()

# Basic Plotting


In [3]:
import numpy as np 
from bokeh.io import output_notebook, show
from bokeh.plotting import figure

## Scatter Plots
Similar to other plotting libraries bokeh is capable of creating multiple types of plots (called _glyphs_ in bokeh). <br>
First step is to create an empty figure where we can add the plots to later.

In [5]:
p = figure(plot_width=600, plot_height=400)

Then we specify the type of plot and add in the data. <br>
_We are going to generate some random data here._

In [6]:
np.random.seed(1)
x = np.random.randn(100)
y = np.random.randn(100)
p.circle(x,y)

Now the plot is generated and we only need to display it. To see the plot we can use `show`:

In [7]:
show(p)

__Note:__ this plot is interactive. You can zoom in and out and move the plots.

There are are a number of parameters you can pass in to adjust the way the plot looks, including:
- `size`
- `fill_color`
- `fill_alpha`
- `line_color`
- `line_width`
- `radius`



In [8]:
p = figure(plot_width=600, plot_height=400)
np.random.seed(1)
x = np.random.randn(100)
y = np.random.randn(100)
p.circle(x,y,fill_color = 'orange',fill_alpha = .5, size = 15,line_color = 'chocolate',line_width = 2)
show(p)

We used `.circle` to create a scatter plot with circles as the marker. There are multiple other options as well:
- `astrisk`
- `cross`
- `diamond`
- `hex`
- `square`
- `triangle and inverted_triangle`
- `x`

In [9]:
p = figure(plot_width=600, plot_height=400)
x = np.random.randn(100)
y = np.random.randn(100)
p.inverted_triangle(x,y,fill_color = 'turquoise',fill_alpha = .5, size = 10,line_color = 'navy')
show(p)

## Line plots
We can create a line plot using `.line()` and pass in the data as well as various styling parameters.

In [43]:
p = figure(plot_width=600, plot_height=400,title = 'Plot Title')
np.random.seed(2)
x = np.arange(100)
y = np.random.randn(100).cumsum()
p.line(x,y,line_color='seagreen',line_width=2)
show(p)

We can also access the components of the plot (e.g. axes, grids, etc.) and manipulate them.

In [40]:
# x-axis label
p.xaxis.axis_label = 'x-axis label'

# x-axis label size
p.xaxis.axis_label_text_font_size = '16px'

# y-axis label
p.yaxis.axis_label = 'y-axis label'

# y axis label size
p.yaxis.axis_label_text_font_size = '16px'

# plot title
p.title.text = 'Plot Title'

# plot title sie
p.title.text_font_size = '24px'

# horizontal grid lines color
p.ygrid.grid_line_color = 'black' 

# horizontal grid lines opacity
p.ygrid.grid_line_alpha = .5 


show(p)

You can also have multiple plots in a single figure.

In [56]:
p = figure(plot_width=600, plot_height=400,title = 'Plot Title')
np.random.seed(2)
x = np.arange(100)
y = np.random.randn(100).cumsum()
p.line(x,y,line_color='orange',line_width=2,line_alpha=.5)
p.circle(x,y,size=8,fill_color = 'orangered',line_color='black')
show(p)

In [246]:
# Here is a list of categorical values (or factors)
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']

# Set the x_range to the list of categories above
p = figure(x_range=fruits, plot_height=250, title="Fruit Counts")

# Categorical values can also be used as coordinates
p.vbar(x=fruits, top=[5, 3, 4, 2, 4, 6], width=0.9)

# Set some properties to make the plot look better
p.xgrid.grid_line_color = None
p.y_range.start = 0

show(p)

## Bar Plot


Let's import some data for plotting. The dataset we use here is natural gas production in US by states.

In [405]:
import pandas as pd
ngdf = pd.read_excel('../../data/processed/NG_PROD_SUM.xls',sheet_name=2,header = 2,index_col = 'Date', parse_dates=True)
ngdf.head()

Unnamed: 0_level_0,Other States Natural Gas Gross Withdrawals (MMcf),Alabama Natural Gas Gross Withdrawals (MMcf),Arizona Natural Gas Gross Withdrawals (MMcf),Florida Natural Gas Gross Withdrawals (MMcf),Idaho Gross Withdrawals of Natural Gas (Million Cubic Feet),Illinois Natural Gas Gross Withdrawals (MMcf),Indiana Natural Gas Gross Withdrawals (MMcf),Kentucky Natural Gas Gross Withdrawals (MMcf),Maryland Natural Gas Gross Withdrawals (MMcf),Michigan Natural Gas Gross Withdrawals (MMcf),Mississippi Natural Gas Gross Withdrawals (MMcf),Missouri Natural Gas Gross Withdrawals (MMcf),Nebraska Natural Gas Gross Withdrawals (MMcf),Nevada Natural Gas Gross Withdrawals (MMcf),New York Natural Gas Gross Withdrawals (MMcf),Oregon Natural Gas Gross Withdrawals (MMcf),South Dakota Natural Gas Gross Withdrawals (MMcf),Tennessee Natural Gas Gross Withdrawals (MMcf),Virginia Natural Gas Gross Withdrawals (MMcf)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1991-01-15,75788,17691.0,,,,41.0,21.0,7021.0,0.0,15965.0,16689.0,0.0,57.0,3.0,2041.0,,525.0,165.0,1849.0
1991-02-15,66569,16173.0,,,,38.0,18.0,6303.0,0.0,14322.0,14603.0,0.0,58.0,3.0,1820.0,,421.0,148.0,1545.0
1991-03-15,71068,17747.0,,,,40.0,20.0,6870.0,5.0,17792.0,15913.0,1.0,65.0,5.0,1983.0,,458.0,161.0,1076.0
1991-04-15,64899,17335.0,,562.0,,39.0,19.0,6515.0,0.0,18491.0,14873.0,2.0,63.0,6.0,1881.0,,445.0,153.0,906.0
1991-05-15,64083,18260.0,,,,38.0,19.0,6458.0,0.0,19993.0,14762.0,1.0,69.0,6.0,1865.0,,421.0,152.0,698.0


In [406]:
ngdf.columns = [c[:c.find(' ',5)] for c in ngdf.columns]
ngdf = ngdf.resample('A').sum()
ngdf.index = ngdf.index.year
ngdf.head()

Unnamed: 0_level_0,Other,Alabama,Arizona,Florida,Idaho,Illinois,Indiana,Kentucky,Maryland,Michigan,Mississippi,Missouri,Nebraska,Nevada,New York,Oregon,South,Tennessee,Virginia
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1991,797930,223875.0,0.0,3703.0,0.0,466.0,232.0,78905.0,29.0,201413.0,180771.0,15.0,782.0,53.0,22778.0,0.0,5804.0,1856.0,14906.0
1992,818233,413616.0,0.0,7584.0,0.0,348.0,173.0,79690.0,31.0,200479.0,165537.0,27.0,1177.0,30.0,23522.0,0.0,6963.0,1770.0,24733.0
1993,807040,446320.0,0.0,8011.0,0.0,338.0,191.0,86965.0,28.0,210298.0,145025.0,14.0,2115.0,23.0,21198.0,0.0,7058.0,1659.0,37839.0
1994,793263,578862.0,0.0,8469.0,0.0,333.0,106.0,73082.0,27.0,228321.0,121802.0,8.0,2899.0,12.0,20475.0,0.0,7264.0,1991.0,50262.0
1995,774977,580125.0,0.0,7132.0,0.0,334.0,249.0,74756.0,24.0,243865.0,119454.0,17.0,2239.0,12.0,18398.0,0.0,8375.0,1820.0,49817.0


To create a vertical bar chart we can use `.vbar()`. But first we need to create a figure and pass in what is going to be displayed on x-axis.

In [316]:
p = figure(x_range = ngdf.columns.to_list(), plot_height = 500, title = 'Natural Gas production by state in 2015')


Now we use `p.vbar()` and pass in the x values, which are the name of the states. We can pass in the location of top of the bars (bar heights) as top.

In [317]:
p.vbar(x = ngdf.columns.to_list(),
       top = ngdf.loc[2015,:].values, 
       width = .5)
show(p)

In [318]:
from bokeh.models import PrintfTickFormatter

p.xgrid.grid_line_color = None # remove the vertical grid lines
p.xaxis.major_label_orientation = np.pi/3 # Angle of x-axis tick labels (angle is in gradian)
p.yaxis.formatter = PrintfTickFormatter(format = "%d") # Change tick values formatting to be shown as full numbers
show(p)

Similarly you can create a horizontal bar chart.

In [327]:
p = figure(y_range = ngdf.columns.to_list(), plot_height = 500, title = 'Natural Gas production by state in 2015')
p.hbar(y = ngdf.columns.to_list(),
       right = ngdf.loc[2015,:].values, 
       height = .5)
p.ygrid.grid_line_color = None # remove the vertical grid lines
p.xaxis.formatter = PrintfTickFormatter(format = "%d") # Change tick values formatting to be shown as full numbers
show(p)

Note the difference between the codes for vertical and horizontal bar charts. In vertical we had to specify `top`, but in horizontal we specified `right`. Also `x` and `y` are swapped.

## Stacked Bar
When we want to have multiple bars stacked over each other. The way this plot is done is slightly different from nromal bar charts. For this plot we need have a list of data for each group of bars in a python dictionary. Let's see it through an example.

We want to create a horizontal stacked bar chart of natural gas production from 2015 to 2018. So we put the years in a list so we can use them to get the data for each year from the data frame.

In [350]:
years = [2015,2016,2017,2018]

But when we want to pass in a list of years to the plot, bokeh needs them to be in string format. Therefore, we create a list of years in string format.

In [351]:
yrs_str = [str(y) for y in years]

Now we create the input data by first adding a list of states, and then adding to it a list of values for each year.

In [352]:
data = {'States': ngdf.columns.to_list()}
data.update( {str(y):ngdf.loc[y,:].values for y in years})

__Note:__ The input data is a python dictionary. Also, years are stored in string format.

Now we can pass in the data to the plot.

In [353]:
from bokeh.palettes import BrBG4

p = figure(y_range = ngdf.columns.to_list(), plot_height = 500, title = 'Natural Gas production by state in 2015')
p.hbar_stack(yrs_str,
             y = 'States',
             source = data, 
             color = BrBG4,
             height = .5,
            legend_label = yrs_str)

p.ygrid.grid_line_color = None # remove the vertical grid lines
p.xaxis.formatter = PrintfTickFormatter(format = "%d") # Change tick values formatting to be shown as full numbers
p.xaxis.axis_label = 'Production (MMcf)' # set x-axis label
show(p)

# Styling
We can put each plot into a variable, then we will have access to properties of the plots and manipulate the way they look.

In [58]:
p = figure(plot_width=600, plot_height=400,title = 'Plot Title')
np.random.seed(2)
x = np.arange(100)
y = np.random.randn(100).cumsum()
r1 = p.line(x,y,line_color='orange',line_width=2,line_alpha=.5)
r2 = p.circle(x,y,size=8,fill_color = 'orangered',line_color='black')
show(p)

The properties can be accessed via `.glyph.<property>`

In [60]:
r1.glyph.line_color = 'green'
show(p)

# Bokeh Data Source
Bokeh can work with various formats of data including lists, numpy arrays, and pandas data frames. But, behind the scene, all the data is converted into Bokeh's format of data which is called `ColumnDataSource`. While in may cases we can keep using numpy and pandas data formats, in some cases it is useful to manually convert the data to `ColumnDataSource`.

In [64]:
np.random.seed(12)
x = np.arange(100)
y = np.random.randn(100).cumsum()
z = np.random.rand(100)
source = ColumnDataSource(data = {'x':x, 'y':y,'z':z})

__Note:__ We can also pass in a pandas data frame into `ColumnDataSource`.

When data is stored in a data source, instead of passing in the variables we only pass in the name of the columns (as a string).

In [100]:
p = figure(plot_width = 600, plot_height = 400)
p.circle('x','y',source=source)
show(p)

# Hover Tool
Hovertool allows us to see information about data points when we hover the mouse pointer over them. Using data sources allows us to easily define what information should be displayed. Now that we are using data sources we can display any information in the data source by using `@` followed by the name of the column.<br>
Before we created a data source with x, y, and z values. Now we are going to display the z value when we hover over a point.

In [101]:
p = figure(plot_width = 600, plot_height = 400, tooltips = 'Z = @z')
p.circle('x','y',size = 10,source=source)
show(p)

We can also add the hover separately.

In [99]:
p = figure(plot_width = 600, plot_height = 400)
r = p.circle('x','y',size = 10, source=source)
p.add_tools(HoverTool(tooltips='Z = @z',renderers=[r]))
show(p)

# Transformations
Transform obeject are a transformation of columns in the data source. This allows us to transform the data and then pass them to a glyph.<br>
Let's see a few examples of transformation.


## cumsum
Cumulative sum of data. This is useful when creating a pie plot. Let's see it through an example:

Import Queensland air emission dataset, and create a table containing annual emission of _"Volatile Organic Compounds"_ for top 10 suburbs and put the emission of other suburbs under _"Others"_.

In [187]:
df= pd.read_csv('../../data/processed/npi-2006-qld-air-total-emissions.csv')
df = df.pivot_table(columns='substance',values = 'quantity_in_kg', index='site_address_suburb',aggfunc='mean',fill_value=0)
data = df.sort_values(by='Total Volatile Organic Compounds',ascending = False)[:10]
data = data['Total Volatile Organic Compounds']
total_organic = df['Total Volatile Organic Compounds'].sum()
data['Others'] = total_organic - data.sum()
data = pd.DataFrame(data)
data

Unnamed: 0_level_0,Total Volatile Organic Compounds
site_address_suburb,Unnamed: 1_level_1
Pinkenba,628097.1
Surat,364364.5
Moranbah,248639.6
Dysart,217321.5
Arundel,183000.0
Coomera,170280.0
North Stradbroke Island,153731.0
Karrabin,123704.0
Durham Downs,121500.0
Injune,116219.3


We want to add a seperate color to each suburb, so we use `Category20c`. `Category20c` is a predefined dictionary which contains lists of colors. Depending on how many colors we need we pass in a number as a key and it will return a list of distinct colors. __Note that it only has up to 20 colors.__<br>
Let's add a column for the colors:

In [188]:
from bokeh.palettes import Category20c

data['color'] = Category20c[len(data)]

For simplicity, let's rename `Total Volatile Organic Compounds` column to `Organics`.

In [189]:

data.rename(columns={'Total Volatile Organic Compounds':'Organics'},inplace=True)

We want to create a pie chart, so we need the angle of each piece.

In [190]:
data['angle'] = data['Organics']/data['Organics'].sum()*2*np.pi

To create a pie chart, we use a glyph called `wedge` which creates a sector of a circle. We need to pass in the start and end angle. To get the start and end angle we apply `cumsum` to the angle column of the data. `cumsum` has the option to start from zero or start from the first value in the data set.<br>
And since `color` is a column in the data we can use it to style the plot.

In [None]:
from bokeh.transform import cumsum

In [203]:
p = figure(plot_height = 500,plot_width=800) #tools = 'hover', tooltips='@site_address_suburb: @Organics kg/year'
r = p.wedge(x=0,y=1,radius=0.5,source=data,
       start_angle = cumsum('angle',include_zero=True), 
       end_angle=cumsum('angle'),
       fill_color = 'color')
show(p)

Now we can add a hover tooltip to the emission amount for each suburb.

In [204]:
p.add_tools(HoverTool(tooltips='@site_address_suburb: @Organics kg/year',renderers=[r]))
show(p)

An to make it look better, we add a bit of styling.

In [205]:
r.glyph.line_color = 'white'
r.glyph.line_width = 2

p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None
show(p)

## Linear colormap
To create colors based on values in the data, we can use `linear_color`. To use this function we need a palette, which is a list of colors (Similar to `Category20c`). You can find a list of all palettes in `bokeh.palettes`. Then, by setting a low and a high value, any value in between will be mapped to a color in the palette.

In [217]:
from bokeh.transform import linear_cmap
linear_cmap()

TypeError: linear_cmap() missing 4 required positional arguments: 'field_name', 'palette', 'low', and 'high'

In [218]:
np.random.seed(12)
x = np.arange(100)
y = np.random.randn(100).cumsum()
z = np.random.rand(100)
source = ColumnDataSource(data = {'x':x, 'y':y,'z':z})
p = figure(plot_width = 600, plot_height = 400, tooltips = 'Z = @z')
p.circle('x','y',
         color= linear_cmap('z','Blues256',0,1),
         line_color = 'gray',
         size = 10,
         source=source)
show(p)
palettes

<module 'bokeh.palettes' from '/home/pooya/anaconda3/lib/python3.7/site-packages/bokeh/palettes.py'>

We can also add a color bar to it.

In [225]:
from bokeh.models import LinearColorMapper, ColorBar
color_mapper = LinearColorMapper(palette="Blues256", low=0, high=1)
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, location=(0,0), title='Z')
p.add_layout(color_bar, 'right')

show(p)

# Plot Layout
Occasionally, we might need to have multiple plots together side-by-side or on top of each other. To do this, we can use `gridplot`. We need to create multiple figures and create a plot with each figure. At the end we use `gridplot` to combine all of them into a single figure.

Generate some data

In [234]:
from bokeh.layouts import gridplot
np.random.seed(24)
x = np.linspace(0,10,100)
y = x + np.random.randn(100)
z = x+np.random.randn(100).cumsum()




First plot:

In [235]:
p1 = figure(width = 300, height = 300)
p1.circle(x,y,color = 'maroon')
show(p1)

Second plot

In [240]:
p2 = figure(width = 300, height = 300)
p2.circle(y,z,color = 'seagreen')
show(p2)

Third plot

In [242]:
p3 = figure(width = 300, height = 300)
p3.line(x,z,color = 'hotpink', width = 2)
show(p3)

Now, we can combine them:

In [244]:
p = gridplot([[p1,p2],[p3,None]])
show(p)

Now we have three plots at the same time and we can use the toolbar for each plot to zoom in and out or move around. Since, these plots are sharing data maybe we need all of them to move at the same time. In other words, if we move to higher values of `y` in the first plot, the second plot should show the same range of `y`. We can do this by linking the plots.

In [245]:
p1 = figure(width = 300, height = 300)
p1.circle(x,y,color = 'maroon')

p2 = figure(width = 300, height = 300, x_range = p1.y_range)
p2.circle(y,z,color = 'seagreen')

p3 = figure(width = 300, height = 300,x_range = p1.x_range, y_range = p2.y_range)
p3.line(x,z,color = 'hotpink', width = 2)

p = gridplot([[p1,p2],[p3,None]])
show(p)

# Widgets


# Geospatial plotting


In [354]:
from bokeh.models import WMTSTileSource

In [378]:
def cvt_lat(x):
    k = 6378137
    return np.log(np.tan((90+x)*np.pi/360))*k
    
def cvt_lon(x):
    k = 6378137
    return x*k*np.pi/180


In [379]:
cvt_lat(-20)

-2273030.92698769

In [381]:
USA = x_range,y_range = ((cvt_lon(115.84),cvt_lon(115.88)),(cvt_lat(-31.94),cvt_lat(-31.98)))

p = figure(tools='pan, wheel_zoom', x_range=x_range, y_range=y_range, 
           x_axis_type="mercator", y_axis_type="mercator")
url = 'http://a.basemaps.cartocdn.com/rastertiles/voyager/{Z}/{X}/{Y}.png'
attribution = "Tiles by Carto, under CC BY 3.0. Data by OSM, under ODbL"

p.add_tile(WMTSTileSource(url=url, attribution=attribution))
show(p)

# Callbacks
Callbacks allow us to change properties of a plot after it is created.

In [429]:
# ngdf
source = ColumnDataSource(data={
    'year': ngdf.index,
    'production': ngdf['Other']
})
# palettes.Viridis256
hover = HoverTool(tooltips = [('Year','@year'),('Production','@production MMcf')])

p = figure(width = 600,height = 400)
r = p.line('year','production',line_width = 3, source = source)
p.yaxis.formatter = PrintfTickFormatter(format = "%d")
show(p,notebook_handle=True)

In [430]:
def update(state = 'Other'):
    newdata={
            'year': ngdf.index,
            'production': ngdf[state]}
    source.data = newdata
    p.title.text = state
    push_notebook()
    show(p)


In [431]:
from IPython.html.widgets import interact
interact(update,state = ngdf.columns.to_list());

interactive(children=(Dropdown(description='state', options=('Other', 'Alabama', 'Arizona', 'Florida', 'Idaho'…

In [443]:
@interact
def ChangeColor(color=['red', 'blue', 'green', 'black'], thickness = (1,5,1)):
    r.glyph.line_color = color
    r.glyph.line_width = thickness
    push_notebook()
    show(p)

interactive(children=(Dropdown(description='color', options=('red', 'blue', 'green', 'black'), value='red'), I…