# Waterfall Chart

See:

- [Wikipedia article](https://en.wikipedia.org/wiki/Waterfall_chart)
- [ggplot2 + waterfalls](https://r-charts.com/flow/waterfall-chart/)

In [1]:
from lets_plot import *
from lets_plot.bistro import *

In [2]:
LetsPlot.setup_html()

In [3]:
data = dict(
    x = ["A", "B", "C", "D", "A", "B", "C", "D"],
    y = [100, 100, -300, 500, -200, 300, 100, -300],
    g = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'],
)

## Default

In [4]:
waterfall_plot(data, 'x', 'y', group='g')

## Parameters

### Aesthetics

In [5]:
# color
gggrid([
    waterfall_plot(data, 'x', 'y', group='g', size=1, color="magenta"),
    waterfall_plot(data, 'x', 'y', group='g', size=1, color='flow_type', fill="lightgrey")
])

In [6]:
# fill
waterfall_plot(data, 'x', 'y', group='g', fill="magenta")

In [7]:
# size
waterfall_plot(data, 'x', 'y', group='g', size=2)

In [8]:
# alpha
waterfall_plot(data, 'x', 'y', group='g', alpha=.5)

In [9]:
# linetype
waterfall_plot(data, 'x', 'y', group='g', size=1, linetype='dashed')

In [10]:
# width
waterfall_plot(data, 'x', 'y', group='g', width=.4)

### Standard parameters

In [11]:
# show_legend
gggrid([
    waterfall_plot(data, 'x', 'y', group='g', show_legend=True) + ggtitle("Show legend", "Default calc_total"),
    waterfall_plot(data, 'x', 'y', group='g', show_legend=True, calc_total=False) + ggtitle("Show legend", "calc_total=False"),
])

In [12]:
# tooltips
gggrid([
    waterfall_plot(data, 'x', 'y', group='g', tooltips='none'),
    waterfall_plot(data, 'x', 'y', group='g', tooltips=layer_tooltips().line("@dy: from @initial to @cumsum").disable_splitting())
])

### Waterfall-specific parameters

In [13]:
# sorted_value
waterfall_plot(data, 'x', 'y', group='g', sorted_value=True)

In [14]:
# threshold
waterfall_plot(data, 'x', 'y', group='g', threshold=300)

In [15]:
# max_values
waterfall_plot(data, 'x', 'y', group='g', max_values=2)

In [16]:
# Use threshold to skip zeros
data_with_zeros = dict(
    x=['a', 'b', 'c', 'd', 'a', 'b'],
    y=[1, -2, 3, 0, 0, 2],
    g=['t', 't', 't', 't', 's', 's'],
)

gggrid([
    waterfall_plot(data_with_zeros, 'x', 'y', group='g'),
    waterfall_plot(data_with_zeros, 'x', 'y', group='g', threshold=1),
])

In [17]:
# calc_total
waterfall_plot(data, 'x', 'y', group='g', calc_total=False)

In [18]:
# total_title
waterfall_plot(data, 'x', 'y', group='g', total_title="Result", show_legend=True)

### Control additional geometries

In [19]:
# hline
gggrid([
    waterfall_plot(data, 'x', 'y', group='g', hline=element_line()),
    waterfall_plot(data, 'x', 'y', group='g', hline=element_line(blank=True)),
    waterfall_plot(data, 'x', 'y', group='g', hline=element_blank()),
    waterfall_plot(data, 'x', 'y', group='g', hline='blank'),
], ncol=2)

In [20]:
# hline_ontop
waterfall_plot(data, 'x', 'y', group='g', hline=element_line(), hline_ontop=False)

In [21]:
# hline color
waterfall_plot(data, 'x', 'y', group='g', hline=element_line(color="magenta"))

In [22]:
# hline size
waterfall_plot(data, 'x', 'y', group='g', hline=element_line(size=2))

In [23]:
# hline linetype
waterfall_plot(data, 'x', 'y', group='g', hline=element_line(linetype='solid'))

In [24]:
# connector
gggrid([
    waterfall_plot(data, 'x', 'y', group='g', width=.5, connector=element_line()),
    waterfall_plot(data, 'x', 'y', group='g', width=.5, connector=element_line(blank=True)),
    waterfall_plot(data, 'x', 'y', group='g', width=.5, connector=element_blank()),
    waterfall_plot(data, 'x', 'y', group='g', width=.5, connector='blank'),
], ncol=2)

In [25]:
# connector color
waterfall_plot(data, 'x', 'y', group='g', width=.5, connector=element_line(color="magenta"))

In [26]:
# connector size
waterfall_plot(data, 'x', 'y', group='g', width=.5, connector=element_line(size=2))

In [27]:
# connector linetype
waterfall_plot(data, 'x', 'y', group='g', width=.5, connector=element_line(linetype='dotted'))

In [28]:
# label
gggrid([
    waterfall_plot(data, 'x', 'y', group='g', label=element_text()),
    waterfall_plot(data, 'x', 'y', group='g', label=element_text(blank=True)),
    waterfall_plot(data, 'x', 'y', group='g', label=element_blank()),
    waterfall_plot(data, 'x', 'y', group='g', label='blank'),
], ncol=2)

In [29]:
# label color
gggrid([
    waterfall_plot(data, 'x', 'y', group='g', label=element_text(color="yellow")),
    waterfall_plot(data, 'x', 'y', group='g', fill="lightgray", label=element_text(color='flow_type'))
])

In [30]:
# label family
waterfall_plot(data, 'x', 'y', group='g', label=element_text(family="Courier"))

In [31]:
# label face
waterfall_plot(data, 'x', 'y', group='g', label=element_text(face='bold_italic'))

In [32]:
# label size
waterfall_plot(data, 'x', 'y', group='g', label=element_text(size=10))

In [33]:
# label angle
waterfall_plot(data, 'x', 'y', group='g', label=element_text(angle=45))

In [34]:
# label hjust/vjust
def get_waterfall_with_justified_labels(hjust, vjust):
    return waterfall_plot(data, 'x', 'y', group='g', label=element_text(hjust=hjust, vjust=vjust)) + \
        ggtitle("Justified labels", "hjust={0}, vjust={1}".format(hjust, vjust))

gggrid([
    get_waterfall_with_justified_labels(0, 0), get_waterfall_with_justified_labels(0, 1),
    get_waterfall_with_justified_labels(1, 0), get_waterfall_with_justified_labels(1, 1),
], ncol=2)

In [35]:
# label_format
waterfall_plot(data, 'x', 'y', group='g', label_format="({.1f})")

## Other Customizations

In [36]:
# fill and color

gggrid([
    waterfall_plot(data, 'x', 'y', group='g', show_legend=True, size=1, color="#777777", label=element_text(color="#777777")) + \
        scale_fill_manual({"Increase": "white", "Decrease": "black", "Total": "yellow"}) + \
        ggtitle("Custom scale_fill_manual()"),
    waterfall_plot(data, 'x', 'y', group='g', show_legend=True, fill="black", label=element_text(color='flow_type')) + \
        scale_color_manual({"Increase": "green", "Decrease": "yellow", "Total": "#bbbbff"}) + \
        ggtitle("Custom scale_color_manual()"),
    waterfall_plot(data, 'x', 'y', group='g', show_legend=True, color="#777777", label=element_text(color="#777777")) + \
        scale_fill_manual({"Increase": "green", "Decrease": "red", "Total": "yellow"}, labels=["Up", "Down", "Result"]) + \
        ggtitle("Custom flow type names"),
], ncol=3)

In [37]:
# flip coordinates
waterfall_plot(data, 'x', 'y', group='g') + coord_flip()

In [38]:
# custom theme
waterfall_plot(data, 'x', 'y', group='g') + theme_bw() + flavor_darcula()

## Tests

### Boundary Value Analysis

In [39]:
import numpy as np

class BVATest:
    def __init__(self, data, title, show=True):
        self.data = data
        self.title = title
        self.show = show

    def to_plot(self):
        return gggrid([
            waterfall_plot(self.data, 'x', 'y', group='g', calc_total=True) + \
                ggtitle(self.title, "calc_total=True"),
            waterfall_plot(self.data, 'x', 'y', group='g', calc_total=False) + \
                ggtitle(self.title, "calc_total=False"),
        ])

show_all = True
bva_tests = [
    BVATest(
        data=dict(
            x=[],
            y=[],
            g=[],
        ),
        title="Empty dataset",
    ),
    BVATest(
        data=dict(
            x=["A"],
            y=[1],
            g=['a'],
        ),
        title="One value dataset",
    ),
    BVATest(
        data=dict(
            x=["A", "A"],
            y=[1, 2],
            g=['a', 'a'],
        ),
        title="Repeated categories",
    ),
    BVATest(
        data=dict(
            x=["A"],
            y=[0],
            g=['a'],
        ),
        title="Zero values",
    ),
    BVATest(
        data=dict(
            x=["A"],
            y=[-1],
            g=['a'],
        ),
        title="Negative values",
    ),
    BVATest(
        data=dict(
            x=[1],
            y=[1],
            g=['a'],
        ),
        title="Numeric x",
    ),
    BVATest(
        data=dict(
            x=["A", "B", "C"],
            y=[1, float('inf'), float('-inf')],
            g=['a', 'a', 'a'],
        ),
        title="Inf values",
    ),
    BVATest(
        data=dict(
            x=["A", "B", "C", None],
            y=[1, 1, None, 1],
            g=['a', None, 'a', 'a'],
        ),
        title="None values",
    ),
    BVATest(
        data=dict(
            x=["A", "B", "C", np.nan],
            y=[1, 1, np.nan, 1],
            g=['a', np.nan, 'a', 'a'],
        ),
        title="np.nan values",
    ),
    BVATest(
        data=dict(
            x=["A", "B"],
            y=[1, -1],
            g=['a', 'a'],
        ),
        title="Total is zero",
    ),
]

gggrid([
    t.to_plot()
    for t in filter(lambda t: show_all or t.show, bva_tests)
], ncol=1)

### Regression Testing

In [40]:
# Total should be equal to 3 (in labels and tooltips)
waterfall_plot(dict(x=["A", "B"], y=[1, 2], g=['a', 'a']), 'x', 'y', group='g')

In [41]:
# Change of hline properties shouldn't affect to the further plots. The same is for connector and label.
gggrid([
    waterfall_plot(data, 'x', 'y', group='g', hline=element_line(color="magenta")),
    waterfall_plot(data, 'x', 'y', group='g', hline=element_line()),
    waterfall_plot(data, 'x', 'y', group='g', connector=element_line(color="magenta")),
    waterfall_plot(data, 'x', 'y', group='g', connector=element_line()),
    waterfall_plot(data, 'x', 'y', group='g', label=element_text(color="black")),
    waterfall_plot(data, 'x', 'y', group='g', label=element_text()),
], ncol=2)

In [42]:
# Change of total_title shouldn't affect to the further plots
gggrid([
    waterfall_plot(data, 'x', 'y', group='g', total_title="Result"),
    waterfall_plot(data, 'x', 'y', group='g'),
])

In [43]:
# tooltips='none' should disable tooltips
waterfall_plot(data, 'x', 'y', group='g', tooltips='none')