# Bokeh Basic Plotting Exercise

###  Imports

In [2]:
import numpy as np

from bokeh.io import output_notebook, show
from bokeh.plotting import figure

#### Load BokehJS into Jupyter

In [5]:
output_notebook()

### Scatter Plots

The basic rules of creating plots are:
* Create a blank figure (p = figure())
* call a glyph method (p.circle())
* use show() to show the figure

In [34]:
scatter_ = figure(plot_width = 400, plot_height = 400, title="My Scatter Plot")


In [101]:
x_axis_ = [1, 2, 3, 4, 5]
y_axis_ = [6, 7, 2, 4, 6]
scatter_.circle(x_axis_,y_axis_, size = 20, alpha = 0.6)
show(scatter_)

#### Line Plots

In [36]:
line_ = figure(plot_width=400, plot_height=400, title="My Line Plot")

line_.line(x_axis_, y_axis_, line_width = 2)

show(line_)

#### DateTime axes

##### Use the Glucose data set for this

In [41]:
# first import the sample data directory from bokeh

import bokeh
bokeh.sampledata.download()

Using data directory: C:\Users\Tasyp\.bokeh\data
Downloading: CGM.csv (1589982 bytes)
   1589982 [100.00%]
Downloading: US_Counties.zip (3171836 bytes)
   3171836 [100.00%]
Unpacking: US_Counties.csv
Downloading: us_cities.json (713565 bytes)
    713565 [100.00%]
Downloading: unemployment09.csv (253301 bytes)
    253301 [100.00%]
Downloading: AAPL.csv (166698 bytes)
    166698 [100.00%]
Downloading: FB.csv (9706 bytes)
      9706 [100.00%]
Downloading: GOOG.csv (113894 bytes)
    113894 [100.00%]
Downloading: IBM.csv (165625 bytes)
    165625 [100.00%]
Downloading: MSFT.csv (161614 bytes)
    161614 [100.00%]
Downloading: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.zip (4816256 bytes)
   4816256 [100.00%]
Unpacking: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.csv
Downloading: gapminder_fertility.csv (64346 bytes)
     64346 [100.00%]
Downloading: gapminder_population.csv (94509 bytes)
     94509 [100.00%]
Downloading: gapminder_life_expectancy.csv (73243 bytes)
     73243 [100.00%]
Downloading

In [42]:
from bokeh.sampledata.glucose import data
data.head()

Unnamed: 0_level_0,isig,glucose
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-03-24 09:51:00,22.59,258.0
2010-03-24 09:56:00,22.52,260.0
2010-03-24 10:01:00,22.23,258.0
2010-03-24 10:06:00,21.56,254.0
2010-03-24 10:11:00,20.79,246.0


In [43]:
data.describe()

Unnamed: 0,isig,glucose
count,37674.0,37673.0
mean,18.137795,137.33167
std,8.147366,53.252266
min,3.88,40.0
25%,12.47,100.0
50%,16.59,126.0
75%,22.12,162.0
max,69.34,400.0


In [44]:
# Check the basic info of the data set
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 37674 entries, 2010-03-24 09:51:00 to 2010-08-01 00:00:00
Data columns (total 2 columns):
isig       37674 non-null float64
glucose    37673 non-null float64
dtypes: float64(2)
memory usage: 883.0 KB


In [46]:
# Build a mask to check if there are any missing values
not_null_mask = data.glucose.notnull()

In [47]:
# Saving only non-missing value data into a new variable
new_data = data[not_null_mask]

In [48]:
new_data.head()

Unnamed: 0_level_0,isig,glucose
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-03-24 09:51:00,22.59,258.0
2010-03-24 09:56:00,22.52,260.0
2010-03-24 10:01:00,22.23,258.0
2010-03-24 10:06:00,21.56,254.0
2010-03-24 10:11:00,20.79,246.0


In [50]:
# confirm that there are no missing values
new_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 37673 entries, 2010-03-24 09:51:00 to 2010-08-17 13:56:00
Data columns (total 2 columns):
isig       37673 non-null float64
glucose    37673 non-null float64
dtypes: float64(2)
memory usage: 883.0 KB


### Plot Line Graph using the datetime

In [51]:
# reduce the datetime to one week
# week = data.loc['2010-10-01' : '2010-10-08']

In [52]:
fig = figure(x_axis_type = 'datetime', title = "Glucose Range", plot_height = 350, plot_width=800)

In [65]:
data.loc['2010-03-24'].head()

Unnamed: 0_level_0,isig,glucose
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-03-24 09:51:00,22.59,258.0
2010-03-24 09:56:00,22.52,260.0
2010-03-24 10:01:00,22.23,258.0
2010-03-24 10:06:00,21.56,254.0
2010-03-24 10:11:00,20.79,246.0


In [66]:
fig.xgrid.grid_line_color = None
fig.ygrid.grid_line_alpha = 0.5
fig.xaxis.axis_label = 'Time'
fig.yaxis.axis_label = 'Value'

fig.line(data.loc['2010-03-24'].index, data.loc['2010-03-24'].glucose)

show(fig)

### Plot Line Graph Using Stock Market Data

In [74]:
from bokeh.sampledata.stocks import AAPL

In [68]:
# Get the dates as an array from the dict object
dates = np.array(AAPL['date'], dtype = np.datetime64)

In [69]:
dates

array(['2000-03-01', '2000-03-02', '2000-03-03', ..., '2013-02-27',
       '2013-02-28', '2013-03-01'], dtype='datetime64[D]')

In [90]:
# Get the closing prices as an array from the dict object
close = np.array(AAPL['close'])

In [95]:
type(close[0])

numpy.float64

In [98]:
# Plot the stock 
fig = figure(x_axis_type = 'datetime', title = "AAPL Stock", plot_height = 350, plot_width=800)
fig.xgrid.grid_line_color = None
fig.ygrid.grid_line_alpha = 0.5
fig.xaxis.axis_label = 'Date'
fig.yaxis.axis_label = 'Close'

fig.line(dates, close)

show(fig)

In [110]:
# Learn more about hex-tile graphs

from bokeh.palettes import Viridis256
from bokeh.util.hex import hexbin

n = 50000
x = np.random.standard_normal(n)
y = np.random.standard_normal(n)

bins = hexbin(x, y, 0.2)

color = [Viridis256[int(i)] for i in bins.counts/max(bins.counts)*255]

# match_aspect ensures neither dimension is squished, regardless of the plot size
p = figure(tools="wheel_zoom,reset", match_aspect=True, background_fill_color='#440154')
p.grid.visible = False

p.hex_tile(bins.q, bins.r, size=0.1, line_color=None, fill_color=color)

show(p)

### Plot with multiple glyhps

In [111]:
# set up some data
x = [1, 2, 3, 4, 5]
y = [6, 7, 8, 7, 3]

mutli_glyphs_ = figure(plot_width=400, plot_height=400)

# add both a line and circles on the same plot
mutli_glyphs_.line(x, y, line_width=2)
mutli_glyphs_.circle(x, y, fill_color="white", size=8)

show(mutli_glyphs_) 