In [1]:
import pandas as pd
from bokeh.charts import output_notebook, show

In [2]:
output_notebook()

# Scatter

In [3]:
# df.columns

In [4]:
from bokeh.charts import Scatter
from bokeh.sampledata.autompg import autompg as df

p = Scatter(df, x='mpg', y='hp', color='cyl', title="HP vs MPG (shaded by CYL)",
            xlabel="Miles Per Gallon", ylabel="Horsepower", legend="top_right")

show(p)

In [2]:
from bokeh.sampledata.autompg import autompg as df
from bokeh.charts import Scatter, output_notebook, show
output_notebook()

In [6]:
df.columns
df.origin.unique()

array([1, 3, 2])

In [3]:
scatter = Scatter(df, x='mpg', y='hp', color='cyl', marker='origin',
                  title="Auto MPG", xlabel="Miles Per Gallon",
                  ylabel="Horsepower")

# output_file('scatter.html')
show(scatter)

# Bar


In [5]:
from bokeh.charts import Bar
from bokeh.charts.attributes import cat, color
from bokeh.charts.operations import blend
from bokeh.charts.utils import df_from_json
from bokeh.sampledata.olympics2014 import data



In [15]:
# Test 
type(data)


dict

In [16]:
# utilize utility to make it easy to get json/dict data converted to a dataframe
df = df_from_json(data)

# filter by countries with at least one medal and sort by total medals
df = df[df['total'] > 0]
df = df.sort_values(by="total", ascending=False) # descending 

In [17]:
df.head()

Unnamed: 0,abbr,bronze,gold,silver,total,name
65,RUS,7,6,8,21,Russian Fed.
81,USA,10,6,4,20,United States
54,NLD,8,6,6,20,Netherlands
56,NOR,7,8,4,19,Norway
13,CAN,4,4,8,16,Canada


In [56]:
bar = Bar(df,
          values=blend('bronze', 'silver', 'gold', name='medals', labels_name='medal'),
          label=cat(columns='abbr', sort=False),
          stack=cat(columns='medal', sort=False),
          color=color(columns='medal', palette=['SaddleBrown', 'Silver', 'Goldenrod'],
                      sort=False),
          legend='top_right',
          title="Medals per Country, Sorted by Total Medals",
          tooltips=[('medal', '@medal'), ('country', '@abbr')]
         )

show(bar)

# Histogram

In [21]:
from bokeh.charts import Histogram, show, output_notebook
from bokeh.sampledata.autompg import autompg as df
output_notebook()

In [29]:
print(df.columns)
print(df.head())
df.sort_values('cyl', inplace=True, ascending=False)
df.head()

Index([u'mpg', u'cyl', u'displ', u'hp', u'weight', u'accel', u'yr', u'origin',
       u'name'],
      dtype='object')
      mpg  cyl  displ   hp  weight  accel  yr  origin                name
241  21.5    3   80.0  110    2720   13.5  77       3          mazda rx-4
331  23.7    3   70.0  100    2420   12.5  80       3       mazda rx-7 gs
110  18.0    3   70.0   90    2124   13.5  73       3           maxda rx3
70   19.0    3   70.0   97    2330   13.5  72       3     mazda rx2 coupe
235  30.5    4   98.0   63    2051   17.0  77       1  chevrolet chevette


Unnamed: 0,mpg,cyl,displ,hp,weight,accel,yr,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
163,20.0,8,262.0,110,3221,13.5,75,1,chevrolet monza 2+2
63,15.0,8,318.0,150,4135,13.5,72,1,plymouth fury iii
211,13.0,8,350.0,145,4055,12.0,76,1,chevy c10
212,13.0,8,302.0,130,3870,15.0,76,1,ford f108


In [25]:
hist = Histogram(df, values='hp', color = 'origin',
                 title="HP Distribution by Cylinder Count", legend='top_right')

show(hist)

# Boxplot

In [4]:
from bokeh.charts import BoxPlot, output_notebook, show
from bokeh.sampledata.autompg import autompg as df
output_notebook()

In [6]:
p = BoxPlot(df, values='mpg', label='origin', color='origin',
            title="MPG Summary (grouped and shaded by CYL)")

show(p)

# Heatmap

In [1]:
from bokeh.charts import HeatMap, bins, output_notebook, show
from bokeh.sampledata.autompg import autompg

output_notebook()

In [3]:
autompg.head()

Unnamed: 0,mpg,cyl,displ,hp,weight,accel,yr,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [6]:
hm = HeatMap(autompg, x=bins('hp'), y=bins('displ'))

show(hm)

# Step

In [12]:
from bokeh.charts import Step, output_notebook, show
import pandas as pd

In [17]:
range(1999, 2016)

[1999,
 2000,
 2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015]

In [18]:
# build a dataset where multiple columns measure the same thing
data = dict(stamp=[
                .33, .33, .34, .37, .37, .37, .37, .39, .41, .42,
                .44, .44, .44, .45, .46, .49, .49],
            postcard=[
                .20, .20, .21, .23, .23, .23, .23, .24, .26, .27,
                .28, .28, .29, .32, .33, .34, .35],
            year = range(1999, 2016),
            )

In [23]:
# create a line chart where each column of measures receives a unique color and dash style
line = Step(data, y=['stamp', 'postcard'], 
            x = 'year',
            dash=['stamp', 'postcard'],
            color=['stamp', 'postcard'],
            title="U.S. Postage Rates (1999-2015)", ylabel='Rate per ounce', legend=True)

show(line)