In [1]:
import numpy as np
import bokeh
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.charts import Histogram
from bokeh.sampledata.iris import flowers

output_notebook()

The bokeh.charts API has moved to a separate 'bkcharts' package.

This compatibility shim will remain until Bokeh 1.0 is released.
After that, if you want to use this API you will have to install
the bkcharts package explicitly.

  warn(message)


The flowers dataset is a dataframe with observational values for a number of samples. 
In Python, operations can be called directly on an object by the dot notation.
View just the top 5 rows by calling the "head" function on the flowers dataframe.

In [2]:
flowers.head(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
display(flowers.tail(3))

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica
149,5.9,3.0,5.1,1.8,virginica


Notice that the indexing starts at 0, so the values will range from 0-149.  You can view any range of rows within the dataframe by specifying the start, length, and increment parameters.  The following starts at the 3rd position, will display up to, but not including the 10th row, skipping every second record.

In [4]:
flowers[3:10:2]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
3,4.6,3.1,1.5,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
7,5.0,3.4,1.5,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa


In [5]:
flowers.sort_values(by='sepal_length').head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
13,4.3,3.0,1.1,0.1,setosa
42,4.4,3.2,1.3,0.2,setosa
38,4.4,3.0,1.3,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
41,4.5,2.3,1.3,0.3,setosa


In [6]:
flowers[flowers.sepal_length < 5]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
6,4.6,3.4,1.4,0.3,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa
11,4.8,3.4,1.6,0.2,setosa
12,4.8,3.0,1.4,0.1,setosa
13,4.3,3.0,1.1,0.1,setosa
22,4.6,3.6,1.0,0.2,setosa


In [7]:
flowers.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [8]:
display(flowers["species"].unique())
display(flowers.species.value_counts())

array(['setosa', 'versicolor', 'virginica'], dtype=object)

versicolor    50
virginica     50
setosa        50
Name: species, dtype: int64

In [9]:
hist = Histogram(flowers,values='petal_length')
show(hist)

In [10]:
hist2 = Histogram(flowers,values='petal_length', label="species", color="species")
show(hist2)

In [11]:
colormap = {'setosa': 'red', 'versicolor': 'green', 'virginica': 'blue'}
colors = [colormap[x] for x in flowers['species']]
p = figure(title = "Iris Morphology")
p.xaxis.axis_label = 'Petal Length'
p.yaxis.axis_label = 'Petal Width'

p.circle(flowers["petal_length"], flowers["petal_width"],
         color=colors, fill_alpha=0.2, size=10)

show(p)

Unfortunately, adding a legend is not straightforward using this method (that I know of), since the colors are not directly attached to the data.  It's best to not alter the original data, so we will create a temporary copy of the data frame and add a column called "colors" as defined by the color map.  The "for" loop will then iterate over each of the species varieties, pull the rows that match that variety, then define the circle location based on the petal length (x-axis) and width (y-axis), with the according color and legend label.

In [12]:
colormap = {'setosa': 'red', 'versicolor': 'green', 'virginica': 'blue'}
temp = flowers
temp['colors'] = [colormap[x] for x in temp['species']]
temp = temp.set_index("species")
display(temp.index)
p = figure(title = "Iris Morphology")
p.xaxis.axis_label = 'Petal Length'
p.yaxis.axis_label = 'Petal Width'

for variety in temp.index.unique():
    p.circle(temp.loc[variety,'petal_length'], temp.loc[variety,"petal_width"],
         color=temp.loc[variety,"colors"], fill_alpha=0.2, size=10, legend=variety)

p.legend.location = "top_left"
show(p)


Index(['setosa', 'setosa', 'setosa', 'setosa', 'setosa', 'setosa', 'setosa',
       'setosa', 'setosa', 'setosa',
       ...
       'virginica', 'virginica', 'virginica', 'virginica', 'virginica',
       'virginica', 'virginica', 'virginica', 'virginica', 'virginica'],
      dtype='object', name='species', length=150)

Here is just another example of how to add a legend using a column data source method with different data.

In [13]:
from bokeh.models import ColumnDataSource, CategoricalColorMapper
from bokeh.palettes import RdBu3

source = ColumnDataSource(dict(
    x=[1, 2, 3, 4, 5, 6],
    y=[2, 1, 2, 1, 2, 1],
    label=['hi', 'lo', 'hi', 'lo', 'hi', 'lo']
))
color_mapper = CategoricalColorMapper(factors=['hi', 'lo'], palette=[RdBu3[2], RdBu3[0]])

p = figure(x_range=(0, 7), y_range=(0, 3), height=300, tools='save')
p.circle(
    x='x', y='y', radius=0.5, source=source,
    color={'field': 'label', 'transform': color_mapper},
    legend='label'
)
show(p)

And another way to iteratively add lines to a plot and define different legend attributes

In [14]:
x = np.linspace(0.1, 5, 100)

p = figure(title="log axis example", y_axis_type="log",
           y_range=(0.001, 10**22))

p.line(x, np.sqrt(x), legend="y=sqrt(x)",
       line_color="tomato", line_dash="dotdash")

p.line(x, x, legend="y=x")
p.circle(x, x, legend="y=x")

p.line(x, x**2, legend="y=x**2")
p.circle(x, x**2, legend="y=x**2",
         fill_color=None, line_color="olivedrab")

p.line(x, 10**x, legend="y=10^x",
       line_color="gold", line_width=2)

p.line(x, x**x, legend="y=x^x",
       line_dash="dotted", line_color="indigo", line_width=2)

p.line(x, 10**(x**2), legend="y=10^(x^2)",
       line_color="coral", line_dash="dashed", line_width=2)

p.legend.location = "top_left"

output_file("logplot.html", title="log plot example")

show(p)  # open a browser