# Waterfall Chart

See:

- [Wikipedia article](https://en.wikipedia.org/wiki/Waterfall_chart)
- [ggplot2 + waterfalls](https://r-charts.com/flow/waterfall-chart/)

In [1]:
from lets_plot import *

In [2]:
LetsPlot.setup_html()

In [3]:
data = dict(
    x = ["A", "C", "B", "D", "E"],
    y = [100, 300, -200, 100, -200],
)

In [4]:
_DY_NAME = "dy"
_CUMSUM_NAME = "cumsum"
_INITIAL_NAME = "initial"
_DY_TITLE = "Difference"
_CUMSUM_TITLE = "Cumulative sum"
_INITIAL_TITLE = "Initial"
_DIFF_TITLE = "Difference"
_CATEGORY_COLORS = {
    "increase": "#4daf4a",
    "decrease": "#e41a1c",
    "total": "#377eb8",
}
_MAP_CATEGORIES_TO_VALUES = ['box', 'label', 'none']

_FILL_DEF = "lightgray"
_SHOW_LEGEND_DEF = False
_MAP_CATEGORIES_TO_DEF = 'box'
_TOOLTIPS_DEF = layer_tooltips().title("^x")\
                                .line("{0}|@{1}".format(_CUMSUM_TITLE, _CUMSUM_NAME))\
                                .line("{0}|@{1}".format(_DY_TITLE, _DY_NAME))\
                                .line("{0}|@{1}".format(_INITIAL_TITLE, _INITIAL_NAME))\
                                .disable_splitting()
_DIFF_CATEGORIES_DEF = {
    "increase": "Increase",
    "decrease": "Decrease",
    "total": "Total",
}
# Total
_CALC_TOTAL_DEF = True
# Horizontal line
_HLINE_DEF = False
# Connector lines
_CONNECTOR_LINES_DEF = True
# Labels
_LABELS_DEF = True
_LABEL_COLOR_DEF = "white"

def _get_stat_data(data, x, y, calc_total, diff_categories):
    xs, ys = data[x], data[y]
    assert len(xs) == len(set(xs)), "x values shouldn't contains duplicates"
    cum_sum = 0
    yprev = []
    ynext = []
    ymin = []
    ymax = []
    diff_type = []
    for y_val in ys:
        yprev.append(cum_sum)
        ynext.append(cum_sum + y_val)
        ymin.append(min(cum_sum, ynext[-1]))
        ymax.append(max(cum_sum, ynext[-1]))
        diff_type.append(diff_categories["increase"] if y_val >= 0 else diff_categories["decrease"])
        cum_sum = ynext[-1]
    if calc_total:
        xs = list(xs) + [diff_categories["total"]]
        ys = list(ys) + [cum_sum - ys[0]]
        yprev.append(ys[0])
        ynext.append(cum_sum)
        ymin.append(min(cum_sum, 0))
        ymax.append(max(cum_sum, 0))
        diff_type.append(diff_categories["total"])
    return {
        'x': xs,
        _INITIAL_NAME: yprev,
        _CUMSUM_NAME: ynext,
        _DY_NAME: ys,
        'ymin': ymin,
        'ymax': ymax,
        _DIFF_TITLE: diff_type,
    }

def _get_annotations_data(stat_data):
    return {**stat_data,
            **{'y': [(stat_data["ymin"][i] + stat_data["ymax"][i]) / 2 \
                     for i in range(len(stat_data["ymin"]))]}}

def _get_intermediate_lines(stat_data):
    from itertools import pairwise
    xs = []
    ys = []
    xends = []
    yends = []
    for i, j in list(pairwise(range(len(stat_data['x'])))):
        xs.append(stat_data['x'][i])
        ys.append(stat_data[_CUMSUM_NAME][i])
        xends.append(stat_data['x'][j])
        yends.append(stat_data[_CUMSUM_NAME][i])
    return {
        'x': xs,
        'y': ys,
        'xend': xends,
        'yend': yends,
    }

def waterfall_plot(data, x, y, *, \
                   color=None, fill=None, size=None, alpha=None, linetype=None, width=None, \
                   show_legend=_SHOW_LEGEND_DEF, tooltips=_TOOLTIPS_DEF, \
                   calc_total=_CALC_TOTAL_DEF, map_categories_to=_MAP_CATEGORIES_TO_DEF, diff_categories=None, \
                   hline=_HLINE_DEF, hline_color=None, hline_size=None, hline_linetype=None, \
                   connector_lines=_CONNECTOR_LINES_DEF, connector_linetype=None, \
                   labels=_LABELS_DEF, label_color=None):

    assert map_categories_to in _MAP_CATEGORIES_TO_VALUES, "map_categories_to should be in {0}".format(_MAP_CATEGORIES_TO_VALUES)
    diff_categories = _DIFF_CATEGORIES_DEF if diff_categories is None else {**_DIFF_CATEGORIES_DEF, **diff_categories}
    stat_data = _get_stat_data(data, x, y, calc_total, diff_categories)
    mapping_dict = {'x': 'x', 'y': _CUMSUM_NAME, 'ymin': 'ymin', 'ymax': 'ymax'}
    if map_categories_to == 'box':
        mapping_dict['fill'] = _DIFF_TITLE
    elif fill is None:
        fill = _FILL_DEF

    p = ggplot()
    if hline:
        p += geom_hline(yintercept=0, color=hline_color, size=hline_size, linetype=hline_linetype, tooltips='none')
    if connector_lines:
        p += geom_segment(aes('x', 'y', xend='xend', yend='yend'), \
                          data=_get_intermediate_lines(stat_data), \
                          linetype=connector_linetype, \
                          color=color, size=size, tooltips='none')
    p += geom_crossbar(aes(**mapping_dict), \
                       data=stat_data, \
                       fatten=0, \
                       color=color, fill=fill, size=size, alpha=alpha, linetype=linetype, \
                       width=width, \
                       show_legend=show_legend, tooltips=tooltips)
    if map_categories_to == 'box':
        p += scale_fill_manual(values={v: _CATEGORY_COLORS[k] for k, v in diff_categories.items()})
    if map_categories_to == 'label':
        p += scale_color_manual(values={v: _CATEGORY_COLORS[k] for k, v in diff_categories.items()})
    if labels:
        labels_mapping_dict = {'x': 'x', 'y': 'y', 'label': _DY_NAME}
        if map_categories_to == 'label':
            labels_mapping_dict['color'] = _DIFF_TITLE
        elif label_color is None:
            label_color = _LABEL_COLOR_DEF
        p += geom_text(aes(**labels_mapping_dict), \
                       data=_get_annotations_data(stat_data), \
                       color=label_color, show_legend=False)
    return p

## Default

In [5]:
waterfall_plot(data, 'x', 'y')

## Parameters

### Aesthetics

In [6]:
# color
waterfall_plot(data, 'x', 'y', color="magenta")

In [7]:
# fill
waterfall_plot(data, 'x', 'y', fill="blue")

In [8]:
# size
waterfall_plot(data, 'x', 'y', size=2)

In [9]:
# alpha
waterfall_plot(data, 'x', 'y', alpha=.5)

In [10]:
# linetype
waterfall_plot(data, 'x', 'y', linetype='dashed')

In [11]:
# width
waterfall_plot(data, 'x', 'y', width=.4)

### Standard parameters

In [12]:
# show_legend
waterfall_plot(data, 'x', 'y', show_legend=True)

In [13]:
# tooltips
gggrid([
    waterfall_plot(data, 'x', 'y', tooltips='none'),
    waterfall_plot(data, 'x', 'y', tooltips=layer_tooltips().line("@dy: from @initial to @cumsum"))
])

### Waterfall-specific parameters

In [14]:
# calc_total
waterfall_plot(data, 'x', 'y', calc_total=False)

In [15]:
# map_categories_to
gggrid([
    waterfall_plot(data, 'x', 'y'),
    waterfall_plot(data, 'x', 'y', map_categories_to='label'),
    waterfall_plot(data, 'x', 'y', map_categories_to='none'),
], ncol=2)

In [16]:
# diff_categories
waterfall_plot(data, 'x', 'y', diff_categories={"total": "Result"}, show_legend=True)

### Control additional geometries

In [17]:
# hline
waterfall_plot(data, 'x', 'y', hline=True)

In [18]:
# hline_color
waterfall_plot(data, 'x', 'y', hline=True, hline_color="magenta")

In [19]:
# hline_size
waterfall_plot(data, 'x', 'y', hline=True, hline_size=2)

In [20]:
# hline_linetype
waterfall_plot(data, 'x', 'y', hline=True, hline_linetype='dashed')

In [21]:
# connector_lines
waterfall_plot(data, 'x', 'y', width=.5, connector_lines=False)

In [22]:
# connector_linetype
waterfall_plot(data, 'x', 'y', width=.5, connector_linetype='dotted')

In [23]:
# labels
waterfall_plot(data, 'x', 'y', labels=False)

In [24]:
# label_color
waterfall_plot(data, 'x', 'y', label_color="yellow")

## Other Customizations

In [25]:
# custom fill
waterfall_plot(data, 'x', 'y', label_color="#777777") + \
    scale_fill_manual({"Increase": "white", "Decrease": "black", "Total": "yellow"})

In [26]:
# flip coordinates
waterfall_plot(data, 'x', 'y') + coord_flip()

In [27]:
# custom theme
waterfall_plot(data, 'x', 'y') + theme_bw()