In [1]:
from pathlib import Path
import pandas as pd
from bokeh.plotting import figure, ColumnDataSource, show
from bokeh.models.mappers import LinearColorMapper
from bokeh.palettes import brewer
from bokeh.io import output_notebook
from math import sqrt, log

output_notebook()

In [2]:
def comet_chart(df, **kwargs):
    """
    Generates comet-charts and originally described by Zan Armstron with bokeh.figure.patches().
    See https://www.zanarmstrong.com/#/infovisresearch/ for details
    
    A comet chart compares two scenarios: (weight_start, value_start) vs (weight_end, value_end).
    Weight_start and _end denotes the size of the population; and value_start and _end denotes the metric.
    
    df:     Pandas dataframe where the last four columns, i.e. df.iloc[:,-4:], contain 
            weight_start, weight_end, value_start, value_end datapoints for each record.
            Preceding columns df.iloc[:,:-4] may contain hierarchy of subpopulations or segments
            for which weighted averages and sums can be calculated.    
    """
    
    def values_to_points(_id, weight_start, weight_end, value_start, value_end):
        """Returns dict with xs, ys, delta_weight for single comet"""
        a = weight_end - weight_start
        b = value_end - value_start
        dist = sqrt(a**2 + b**2)
        halfwidth = dist/16
        comet = {
            '_ids': _id,
            '_delta_weight': a,
            '_xs': [weight_start, halfwidth / dist * b + weight_end, -halfwidth / dist * b + weight_end],
            '_ys': [value_start, -halfwidth / dist * b + value_end, halfwidth / dist * a + value_end]}
        return comet
    
    
    # parse data into dataframe of comets
    comets = []
    for row in df.itertuples():
        comets.append(values_to_points(row[0], row[-4], row[-3], row[-2], row[-1]))
    cdf = pd.DataFrame(comets)
    source = ColumnDataSource(cdf)
    
    # generate plot
    plot = figure()
    color_mapper = LinearColorMapper(palette=brewer['RdBu'][11],
                                     high=cdf['_delta_weight'].max(),
                                     low=cdf['_delta_weight'].min(),
                                     )
    plot.patches('_ys', '_xs', source=source, 
                 fill_color={'field': '_delta_weight', 'transform': color_mapper},
                 fill_alpha=0.7,
                 line_color={'field': '_delta_weight', 'transform': color_mapper},
                )
    return (cdf, plot)
        

In [3]:
# test using CDC wonder dataset
data = Path.cwd() / 'data.csv'
df = pd.DataFrame.from_csv(data)

# calculate log values
for col in df.columns[-4:]:
    df['log_' + col] = df[col].map(lambda x: log(x))
comet = comet_chart(df.reset_index().iloc[:,-4:])
comet[0]
show(comet[1])

In [8]:
comet[0]
cmap = LinearColorMapper(palette=brewer['RdBu'][11],
                                     high=comet[0]['_delta_weight'].max(),
                                     low=comet[0]['_delta_weight'].min(),
                                     )
cmap.

In [4]:
#TO DO: add hoover tool with population/segment characteristics, _weight_value
#TO DO: colors and axis are off
#TO DO: add brushed sortable bar chart