Skip to content

Commit

Permalink
Merge pull request #2996 from solvents/master
Browse files Browse the repository at this point in the history
Violin Plots
  • Loading branch information
tacaswell committed May 26, 2014
2 parents 4b1bd63 + 01c3176 commit 687286a
Show file tree
Hide file tree
Showing 25 changed files with 974 additions and 27 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG
Expand Up @@ -30,6 +30,10 @@
interpolation = 'none' and interpolation = 'nearest' in
`imshow()` when saving vector graphics files.

2014-04-22 Added violin plotting functions. See `Axes.violinplot`,
`Axes.violin`, `cbook.violin_stats` and `mlab.GaussianKDE` for
details.

2014-04-10 Fixed the triangular marker rendering error. The "Up" triangle was
rendered instead of "Right" triangle and vice-versa.

Expand Down
1 change: 1 addition & 0 deletions boilerplate.py
Expand Up @@ -146,6 +146,7 @@ def boilerplate_gen():
'tricontourf',
'tripcolor',
'triplot',
'violinplot',
'vlines',
'xcorr',
'barbs',
Expand Down
21 changes: 21 additions & 0 deletions doc/users/whats_new.rst
Expand Up @@ -172,6 +172,27 @@ Added the Axes method :meth:`~matplotlib.axes.Axes.add_image` to put image
handling on a par with artists, collections, containers, lines, patches,
and tables.

Violin Plots
````````````
Per Parker, Gregory Kelsie, Adam Ortiz, Kevin Chan, Geoffrey Lee, Deokjae
Donald Seo, and Taesu Terry Lim added a basic implementation for violin
plots. Violin plots can be used to represent the distribution of sample data.
They are similar to box plots, but use a kernel density estimation function to
present a smooth approximation of the data sample used. The added features are:

:func:`~matplotlib.Axes.violin` - Renders a violin plot from a collection of
statistics.
:func:`~matplotlib.cbook.violin_stats` - Produces a collection of statistics
suitable for rendering a violin plot.
:func:`~matplotlib.pyplot.violinplot` - Creates a violin plot from a set of
sample data. This method makes use of :func:`~matplotlib.cbook.violin_stats`
to process the input data, and :func:`~matplotlib.cbook.violin_stats` to
do the actual rendering. Users are also free to modify or replace the output of
:func:`~matplotlib.cbook.violin_stats` in order to customize the violin plots
to their liking.

This feature was implemented for a software engineering course at the
University of Toronto, Scarborough, run in Winter 2014 by Anya Tafliovich.

More `markevery` options to show only a subset of markers
`````````````````````````````````````````````````````````
Expand Down
48 changes: 48 additions & 0 deletions examples/statistics/violinplot_demo.py
@@ -0,0 +1,48 @@
"""
Demo of the new violinplot functionality
"""

import random
import numpy as np
import matplotlib.pyplot as plt

# fake data
fs = 10 # fontsize
pos = [1,2,4,5,7,8]
data = [np.random.normal(size=100) for i in pos]

fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(6,6))

axes[0, 0].violinplot(data, pos, points=20, widths=0.1,
showmeans=True, showextrema=True, showmedians=True)
axes[0, 0].set_title('Custom violinplot 1', fontsize=fs)

axes[0, 1].violinplot(data, pos, points=40, widths=0.3,
showmeans=True, showextrema=True, showmedians=True,
bw_method='silverman')
axes[0, 1].set_title('Custom violinplot 2', fontsize=fs)

axes[0, 2].violinplot(data, pos, points=60, widths=0.5, showmeans=True,
showextrema=True, showmedians=True, bw_method=0.5)
axes[0, 2].set_title('Custom violinplot 3', fontsize=fs)

axes[1, 0].violinplot(data, pos, points=80, vert=False, widths=0.7,
showmeans=True, showextrema=True, showmedians=True)
axes[1, 0].set_title('Custom violinplot 4', fontsize=fs)

axes[1, 1].violinplot(data, pos, points=100, vert=False, widths=0.9,
showmeans=True, showextrema=True, showmedians=True,
bw_method='silverman')
axes[1, 1].set_title('Custom violinplot 5', fontsize=fs)

axes[1, 2].violinplot(data, pos, points=200, vert=False, widths=1.1,
showmeans=True, showextrema=True, showmedians=True,
bw_method=0.5)
axes[1, 2].set_title('Custom violinplot 6', fontsize=fs)

for ax in axes.flatten():
ax.set_yticklabels([])

fig.suptitle("Violin Plotting Examples")
fig.subplots_adjust(hspace=0.4)
plt.show()
242 changes: 242 additions & 0 deletions lib/matplotlib/axes/_axes.py
Expand Up @@ -6725,6 +6725,248 @@ def matshow(self, Z, **kwargs):
integer=True))
return im

def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
showmeans=False, showextrema=True, showmedians=False,
points=100, bw_method=None):
"""
Make a violin plot.
Call signature::
violinplot(dataset, positions=None, vert=True, widths=0.5,
showmeans=False, showextrema=True, showmedians=False,
points=100, bw_method=None):
Make a violin plot for each column of *dataset* or each vector in
sequence *dataset*. Each filled area extends to represent the
entire data range, with optional lines at the mean, the median,
the minimum, and the maximum.
Parameters
----------
dataset : Array or a sequence of vectors.
The input data.
positions : array-like, default = [1, 2, ..., n]
Sets the positions of the violins. The ticks and limits are
automatically set to match the positions.
vert : bool, default = True.
If true, creates a vertical violin plot.
Otherwise, creates a horizontal violin plot.
widths : array-like, default = 0.5
Either a scalar or a vector that sets the maximal width of
each violin. The default is 0.5, which uses about half of the
available horizontal space.
showmeans : bool, default = False
If true, will toggle rendering of the means.
showextrema : bool, default = True
If true, will toggle rendering of the extrema.
showmedians : bool, default = False
If true, will toggle rendering of the medians.
points : scalar, default = 100
Defines the number of points to evaluate each of the gaussian
kernel density estimations at.
bw_method : str, scalar or callable, optional
The method used to calculate the estimator bandwidth. This can be
'scott', 'silverman', a scalar constant or a callable. If a
scalar, this will be used directly as `kde.factor`. If a
callable, it should take a `GaussianKDE` instance as its only
parameter and return a scalar. If None (default), 'scott' is used.
Returns
-------
A dictionary mapping each component of the violinplot to a list of the
corresponding collection instances created. The dictionary has
the following keys:
- bodies: A list of the
:class:`matplotlib.collections.PolyCollection` instances
containing the filled area of each violin.
- means: A :class:`matplotlib.collections.LineCollection` instance
created to identify the mean values of each of the violin's
distribution.
- mins: A :class:`matplotlib.collections.LineCollection` instance
created to identify the bottom of each violin's distribution.
- maxes: A :class:`matplotlib.collections.LineCollection` instance
created to identify the top of each violin's distribution.
- bars: A :class:`matplotlib.collections.LineCollection` instance
created to identify the centers of each violin's distribution.
- medians: A :class:`matplotlib.collections.LineCollection`
instance created to identify the median values of each of the
violin's distribution.
"""

def _kde_method(X, coords):
kde = mlab.GaussianKDE(X, bw_method)
return kde.evaluate(coords)

vpstats = cbook.violin_stats(dataset, _kde_method, points=points)
return self.violin(vpstats, positions=positions, vert=vert,
widths=widths, showmeans=showmeans,
showextrema=showextrema, showmedians=showmedians)

def violin(self, vpstats, positions=None, vert=True, widths=0.5,
showmeans=False, showextrema=True, showmedians=False):
"""
Drawing function for violin plots.
Call signature::
violin(vpstats, positions=None, vert=True, widths=0.5,
showmeans=False, showextrema=True, showmedians=False):
Draw a violin plot for each column of `vpstats`. Each filled area
extends to represent the entire data range, with optional lines at the
mean, the median, the minimum, and the maximum.
Parameters
----------
vpstats : list of dicts
A list of dictionaries containing stats for each violin plot.
Required keys are:
- coords: A list of scalars containing the coordinates that
the violin's kernel density estimate were evaluated at.
- vals: A list of scalars containing the values of the kernel
density estimate at each of the coordinates given in `coords`.
- mean: The mean value for this violin's dataset.
- median: The median value for this violin's dataset.
- min: The minimum value for this violin's dataset.
- max: The maximum value for this violin's dataset.
positions : array-like, default = [1, 2, ..., n]
Sets the positions of the violins. The ticks and limits are
automatically set to match the positions.
vert : bool, default = True.
If true, plots the violins veritcally.
Otherwise, plots the violins horizontally.
widths : array-like, default = 0.5
Either a scalar or a vector that sets the maximal width of
each violin. The default is 0.5, which uses about half of the
available horizontal space.
showmeans : bool, default = False
If true, will toggle rendering of the means.
showextrema : bool, default = True
If true, will toggle rendering of the extrema.
showmedians : bool, default = False
If true, will toggle rendering of the medians.
Returns
-------
A dictionary mapping each component of the violinplot to a list of the
corresponding collection instances created. The dictionary has
the following keys:
- bodies: A list of the
:class:`matplotlib.collections.PolyCollection` instances
containing the filled area of each violin.
- means: A :class:`matplotlib.collections.LineCollection` instance
created to identify the mean values of each of the violin's
distribution.
- mins: A :class:`matplotlib.collections.LineCollection` instance
created to identify the bottom of each violin's distribution.
- maxes: A :class:`matplotlib.collections.LineCollection` instance
created to identify the top of each violin's distribution.
- bars: A :class:`matplotlib.collections.LineCollection` instance
created to identify the centers of each violin's distribution.
- medians: A :class:`matplotlib.collections.LineCollection`
instance created to identify the median values of each of the
violin's distribution.
"""

# Statistical quantities to be plotted on the violins
means = []
mins = []
maxes = []
medians = []

# Collections to be returned
artists = {}

N = len(vpstats)
datashape_message = ("List of violinplot statistics and `{0}` "
"values must have the same length")

# Validate positions
if positions is None:
positions = range(1, N + 1)
elif len(positions) != N:
raise ValueError(datashape_message.format("positions"))

# Validate widths
if np.isscalar(widths):
widths = [widths] * N
elif len(widths) != N:
raise ValueError(datashape_message.format("widths"))

# Calculate ranges for statistics lines
pmins = -0.25 * np.array(widths) + positions
pmaxes = 0.25 * np.array(widths) + positions

# Check whether we are rendering vertically or horizontally
if vert:
fill = self.fill_betweenx
perp_lines = self.hlines
par_lines = self.vlines
else:
fill = self.fill_between
perp_lines = self.vlines
par_lines = self.hlines

# Render violins
bodies = []
for stats, pos, width in zip(vpstats, positions, widths):
# The 0.5 factor reflects the fact that we plot from v-p to
# v+p
vals = np.array(stats['vals'])
vals = 0.5 * width * vals / vals.max()
bodies += [fill(stats['coords'],
-vals + pos,
vals + pos,
facecolor='y',
alpha=0.3)]
means.append(stats['mean'])
mins.append(stats['min'])
maxes.append(stats['max'])
medians.append(stats['median'])
artists['bodies'] = bodies

# Render means
if showmeans:
artists['cmeans'] = perp_lines(means, pmins, pmaxes, colors='r')

# Render extrema
if showextrema:
artists['cmaxes'] = perp_lines(maxes, pmins, pmaxes, colors='r')
artists['cmins'] = perp_lines(mins, pmins, pmaxes, colors='r')
artists['cbars'] = par_lines(positions, mins, maxes, colors='r')

# Render medians
if showmedians:
artists['cmedians'] = perp_lines(medians,
pmins,
pmaxes,
colors='r')

return artists

def tricontour(self, *args, **kwargs):
return mtri.tricontour(self, *args, **kwargs)
tricontour.__doc__ = mtri.TriContourSet.tricontour_doc
Expand Down

0 comments on commit 687286a

Please sign in to comment.