In [1]:
from bokeh.charts import Donut, show, output_notebook, vplot
from bokeh.charts.utils import df_from_json
from bokeh.sampledata.olympics2014 import data
from bokeh.sampledata.autompg import autompg

output_notebook()

In [3]:
import pandas as pd

## Generic Examples

### Values with implied index

In [4]:
d = Donut([2, 4, 5, 2, 8])
show(d)

### Values with Explicit Index

In [5]:
d = Donut(pd.Series([2, 4, 5, 2, 8], index=['a', 'b', 'c', 'd', 'e']))
show(d)

## Autompg Data

### Take a look at the data

In [6]:
autompg.head()

Unnamed: 0,mpg,cyl,displ,hp,weight,accel,yr,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


### Simple example implies count when object or categorical

In [7]:
d = Donut(autompg.cyl.astype(str))
show(d)

### Equivalent with columns specified

In [8]:
d = Donut(autompg, label='cyl', agg='count')
show(d)

### Given an indexed series of data pre-aggregated

In [9]:
d = Donut(autompg.groupby('cyl').displ.mean())
show(d)

### Equivalent with columns specified

In [10]:
d = Donut(autompg, label='cyl',
           values='displ', agg='mean')
show(d)

### Given a multi-indexed series fo data pre-aggregated
Since the aggregation type isn't specified, we must provide it to the chart for use in the tooltip, otherwise it will just say "value".

In [11]:
d = Donut(autompg.groupby(['cyl', 'origin']).displ.mean(), hover_text='mean')
show(d)

### Column Labels Produces Slightly Different Result
In previous series input example we do not have the original values so we cannot size the wedges based on the mean of displacement for Cyl, then size the wedges proportionally inside of the Cyl wedge. This column labeled example can perform the right sizing, so would be preferred for any aggregated values.

In [12]:
d = Donut(autompg, label=['cyl', 'origin'],
           values='displ', agg='mean')
show(d)

### The spacing between each donut level can be altered
By default, this is applied to only the levels other than the first.

In [13]:
d = Donut(autompg, label=['cyl', 'origin'],
           values='displ', agg='mean', level_spacing=0.15)
show(d)

### Can specify the spacing for each level
This is applied to each level individually, including the first.

In [14]:
d = Donut(autompg, label=['cyl', 'origin'],
           values='displ', agg='mean', level_spacing=[0.8, 0.3])
show(d)

## Olympics Example

### Take a look at source data

In [15]:
print(data.keys())
data['data'][0]

[u'count', u'object', u'data']


{u'abbr': u'ALB',
 u'medals': {u'bronze': 0, u'gold': 0, u'silver': 0, u'total': 0},
 u'name': u'Albania'}

### Look at table formatted data

In [16]:
# utilize utility to make it easy to get json/dict data converted to a dataframe
df = df_from_json(data)
df.head()

Unnamed: 0,abbr,bronze,gold,silver,total,name
0,ALB,0,0,0,0,Albania
1,AND,0,0,0,0,Andorra
2,ARG,0,0,0,0,Argentina
3,ARM,0,0,0,0,Armenia
4,AUS,1,0,2,3,Australia


### Prepare the data
This data is in a "pivoted" format, and since the charts interface is built around referencing columns, it is more convenient to de-pivot the data.

- We will sort the data by total medals and select the top rows by the total medals.
- Use pandas.melt to de-pivot the data.

In [18]:
# filter by countries with at least one medal and sort by total medals
df = df[df['total'] > 8]
df = df.sort_values("total", ascending=False)
olympics = pd.melt(df, id_vars=['abbr'],
                   value_vars=['bronze', 'silver', 'gold'],
                   value_name='medal_count', var_name='medal')
olympics.head()

Unnamed: 0,abbr,medal,medal_count
0,RUS,bronze,7
1,NLD,bronze,8
2,USA,bronze,10
3,NOR,bronze,7
4,CAN,bronze,4


In [19]:
# original example
d0 = Donut(olympics, label=['abbr', 'medal'], values='medal_count',
           text_font_size='8pt', hover_text='medal_count')
show(d0)