diff --git a/CHANGELOG b/CHANGELOG index 1f9d4a7cc33c..1b24d88f661d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -30,6 +30,10 @@ interpolation = 'none' and interpolation = 'nearest' in `imshow()` when saving vector graphics files. +2014-04-22 Added violin plotting functions. See `Axes.violinplot`, + `Axes.violin`, `cbook.violin_stats` and `mlab.GaussianKDE` for + details. + 2014-04-10 Fixed the triangular marker rendering error. The "Up" triangle was rendered instead of "Right" triangle and vice-versa. diff --git a/boilerplate.py b/boilerplate.py index 5bcbf03f741a..1791c808192d 100644 --- a/boilerplate.py +++ b/boilerplate.py @@ -146,6 +146,7 @@ def boilerplate_gen(): 'tricontourf', 'tripcolor', 'triplot', + 'violinplot', 'vlines', 'xcorr', 'barbs', diff --git a/doc/users/whats_new.rst b/doc/users/whats_new.rst index f6706fcdd083..95e64afedb8d 100644 --- a/doc/users/whats_new.rst +++ b/doc/users/whats_new.rst @@ -172,6 +172,27 @@ Added the Axes method :meth:`~matplotlib.axes.Axes.add_image` to put image handling on a par with artists, collections, containers, lines, patches, and tables. +Violin Plots +```````````` +Per Parker, Gregory Kelsie, Adam Ortiz, Kevin Chan, Geoffrey Lee, Deokjae +Donald Seo, and Taesu Terry Lim added a basic implementation for violin +plots. Violin plots can be used to represent the distribution of sample data. +They are similar to box plots, but use a kernel density estimation function to +present a smooth approximation of the data sample used. The added features are: + +:func:`~matplotlib.Axes.violin` - Renders a violin plot from a collection of +statistics. +:func:`~matplotlib.cbook.violin_stats` - Produces a collection of statistics +suitable for rendering a violin plot. +:func:`~matplotlib.pyplot.violinplot` - Creates a violin plot from a set of +sample data. This method makes use of :func:`~matplotlib.cbook.violin_stats` +to process the input data, and :func:`~matplotlib.cbook.violin_stats` to +do the actual rendering. Users are also free to modify or replace the output of +:func:`~matplotlib.cbook.violin_stats` in order to customize the violin plots +to their liking. + +This feature was implemented for a software engineering course at the +University of Toronto, Scarborough, run in Winter 2014 by Anya Tafliovich. More `markevery` options to show only a subset of markers ````````````````````````````````````````````````````````` diff --git a/examples/statistics/violinplot_demo.py b/examples/statistics/violinplot_demo.py new file mode 100644 index 000000000000..448abc976081 --- /dev/null +++ b/examples/statistics/violinplot_demo.py @@ -0,0 +1,48 @@ +""" +Demo of the new violinplot functionality +""" + +import random +import numpy as np +import matplotlib.pyplot as plt + +# fake data +fs = 10 # fontsize +pos = [1,2,4,5,7,8] +data = [np.random.normal(size=100) for i in pos] + +fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(6,6)) + +axes[0, 0].violinplot(data, pos, points=20, widths=0.1, + showmeans=True, showextrema=True, showmedians=True) +axes[0, 0].set_title('Custom violinplot 1', fontsize=fs) + +axes[0, 1].violinplot(data, pos, points=40, widths=0.3, + showmeans=True, showextrema=True, showmedians=True, + bw_method='silverman') +axes[0, 1].set_title('Custom violinplot 2', fontsize=fs) + +axes[0, 2].violinplot(data, pos, points=60, widths=0.5, showmeans=True, + showextrema=True, showmedians=True, bw_method=0.5) +axes[0, 2].set_title('Custom violinplot 3', fontsize=fs) + +axes[1, 0].violinplot(data, pos, points=80, vert=False, widths=0.7, + showmeans=True, showextrema=True, showmedians=True) +axes[1, 0].set_title('Custom violinplot 4', fontsize=fs) + +axes[1, 1].violinplot(data, pos, points=100, vert=False, widths=0.9, + showmeans=True, showextrema=True, showmedians=True, + bw_method='silverman') +axes[1, 1].set_title('Custom violinplot 5', fontsize=fs) + +axes[1, 2].violinplot(data, pos, points=200, vert=False, widths=1.1, + showmeans=True, showextrema=True, showmedians=True, + bw_method=0.5) +axes[1, 2].set_title('Custom violinplot 6', fontsize=fs) + +for ax in axes.flatten(): + ax.set_yticklabels([]) + +fig.suptitle("Violin Plotting Examples") +fig.subplots_adjust(hspace=0.4) +plt.show() diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index e0949cc9e8b9..40f230d9a650 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -6725,6 +6725,248 @@ def matshow(self, Z, **kwargs): integer=True)) return im + def violinplot(self, dataset, positions=None, vert=True, widths=0.5, + showmeans=False, showextrema=True, showmedians=False, + points=100, bw_method=None): + """ + Make a violin plot. + + Call signature:: + + violinplot(dataset, positions=None, vert=True, widths=0.5, + showmeans=False, showextrema=True, showmedians=False, + points=100, bw_method=None): + + Make a violin plot for each column of *dataset* or each vector in + sequence *dataset*. Each filled area extends to represent the + entire data range, with optional lines at the mean, the median, + the minimum, and the maximum. + + Parameters + ---------- + + dataset : Array or a sequence of vectors. + The input data. + + positions : array-like, default = [1, 2, ..., n] + Sets the positions of the violins. The ticks and limits are + automatically set to match the positions. + + vert : bool, default = True. + If true, creates a vertical violin plot. + Otherwise, creates a horizontal violin plot. + + widths : array-like, default = 0.5 + Either a scalar or a vector that sets the maximal width of + each violin. The default is 0.5, which uses about half of the + available horizontal space. + + showmeans : bool, default = False + If true, will toggle rendering of the means. + + showextrema : bool, default = True + If true, will toggle rendering of the extrema. + + showmedians : bool, default = False + If true, will toggle rendering of the medians. + + points : scalar, default = 100 + Defines the number of points to evaluate each of the gaussian + kernel density estimations at. + + bw_method : str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. If a + scalar, this will be used directly as `kde.factor`. If a + callable, it should take a `GaussianKDE` instance as its only + parameter and return a scalar. If None (default), 'scott' is used. + + Returns + ------- + + A dictionary mapping each component of the violinplot to a list of the + corresponding collection instances created. The dictionary has + the following keys: + + - bodies: A list of the + :class:`matplotlib.collections.PolyCollection` instances + containing the filled area of each violin. + - means: A :class:`matplotlib.collections.LineCollection` instance + created to identify the mean values of each of the violin's + distribution. + - mins: A :class:`matplotlib.collections.LineCollection` instance + created to identify the bottom of each violin's distribution. + - maxes: A :class:`matplotlib.collections.LineCollection` instance + created to identify the top of each violin's distribution. + - bars: A :class:`matplotlib.collections.LineCollection` instance + created to identify the centers of each violin's distribution. + - medians: A :class:`matplotlib.collections.LineCollection` + instance created to identify the median values of each of the + violin's distribution. + + """ + + def _kde_method(X, coords): + kde = mlab.GaussianKDE(X, bw_method) + return kde.evaluate(coords) + + vpstats = cbook.violin_stats(dataset, _kde_method, points=points) + return self.violin(vpstats, positions=positions, vert=vert, + widths=widths, showmeans=showmeans, + showextrema=showextrema, showmedians=showmedians) + + def violin(self, vpstats, positions=None, vert=True, widths=0.5, + showmeans=False, showextrema=True, showmedians=False): + """ + Drawing function for violin plots. + + Call signature:: + + violin(vpstats, positions=None, vert=True, widths=0.5, + showmeans=False, showextrema=True, showmedians=False): + + Draw a violin plot for each column of `vpstats`. Each filled area + extends to represent the entire data range, with optional lines at the + mean, the median, the minimum, and the maximum. + + Parameters + ---------- + + vpstats : list of dicts + A list of dictionaries containing stats for each violin plot. + Required keys are: + - coords: A list of scalars containing the coordinates that + the violin's kernel density estimate were evaluated at. + - vals: A list of scalars containing the values of the kernel + density estimate at each of the coordinates given in `coords`. + - mean: The mean value for this violin's dataset. + - median: The median value for this violin's dataset. + - min: The minimum value for this violin's dataset. + - max: The maximum value for this violin's dataset. + + positions : array-like, default = [1, 2, ..., n] + Sets the positions of the violins. The ticks and limits are + automatically set to match the positions. + + vert : bool, default = True. + If true, plots the violins veritcally. + Otherwise, plots the violins horizontally. + + widths : array-like, default = 0.5 + Either a scalar or a vector that sets the maximal width of + each violin. The default is 0.5, which uses about half of the + available horizontal space. + + showmeans : bool, default = False + If true, will toggle rendering of the means. + + showextrema : bool, default = True + If true, will toggle rendering of the extrema. + + showmedians : bool, default = False + If true, will toggle rendering of the medians. + + Returns + ------- + + A dictionary mapping each component of the violinplot to a list of the + corresponding collection instances created. The dictionary has + the following keys: + + - bodies: A list of the + :class:`matplotlib.collections.PolyCollection` instances + containing the filled area of each violin. + - means: A :class:`matplotlib.collections.LineCollection` instance + created to identify the mean values of each of the violin's + distribution. + - mins: A :class:`matplotlib.collections.LineCollection` instance + created to identify the bottom of each violin's distribution. + - maxes: A :class:`matplotlib.collections.LineCollection` instance + created to identify the top of each violin's distribution. + - bars: A :class:`matplotlib.collections.LineCollection` instance + created to identify the centers of each violin's distribution. + - medians: A :class:`matplotlib.collections.LineCollection` + instance created to identify the median values of each of the + violin's distribution. + + """ + + # Statistical quantities to be plotted on the violins + means = [] + mins = [] + maxes = [] + medians = [] + + # Collections to be returned + artists = {} + + N = len(vpstats) + datashape_message = ("List of violinplot statistics and `{0}` " + "values must have the same length") + + # Validate positions + if positions is None: + positions = range(1, N + 1) + elif len(positions) != N: + raise ValueError(datashape_message.format("positions")) + + # Validate widths + if np.isscalar(widths): + widths = [widths] * N + elif len(widths) != N: + raise ValueError(datashape_message.format("widths")) + + # Calculate ranges for statistics lines + pmins = -0.25 * np.array(widths) + positions + pmaxes = 0.25 * np.array(widths) + positions + + # Check whether we are rendering vertically or horizontally + if vert: + fill = self.fill_betweenx + perp_lines = self.hlines + par_lines = self.vlines + else: + fill = self.fill_between + perp_lines = self.vlines + par_lines = self.hlines + + # Render violins + bodies = [] + for stats, pos, width in zip(vpstats, positions, widths): + # The 0.5 factor reflects the fact that we plot from v-p to + # v+p + vals = np.array(stats['vals']) + vals = 0.5 * width * vals / vals.max() + bodies += [fill(stats['coords'], + -vals + pos, + vals + pos, + facecolor='y', + alpha=0.3)] + means.append(stats['mean']) + mins.append(stats['min']) + maxes.append(stats['max']) + medians.append(stats['median']) + artists['bodies'] = bodies + + # Render means + if showmeans: + artists['cmeans'] = perp_lines(means, pmins, pmaxes, colors='r') + + # Render extrema + if showextrema: + artists['cmaxes'] = perp_lines(maxes, pmins, pmaxes, colors='r') + artists['cmins'] = perp_lines(mins, pmins, pmaxes, colors='r') + artists['cbars'] = par_lines(positions, mins, maxes, colors='r') + + # Render medians + if showmedians: + artists['cmedians'] = perp_lines(medians, + pmins, + pmaxes, + colors='r') + + return artists + def tricontour(self, *args, **kwargs): return mtri.tricontour(self, *args, **kwargs) tricontour.__doc__ = mtri.TriContourSet.tricontour_doc diff --git a/lib/matplotlib/cbook.py b/lib/matplotlib/cbook.py index a22a60ebf559..49bdf3fb2534 100644 --- a/lib/matplotlib/cbook.py +++ b/lib/matplotlib/cbook.py @@ -1944,28 +1944,7 @@ def _compute_conf_interval(data, med, iqr, bootstrap): bxpstats = [] # convert X to a list of lists - if hasattr(X, 'shape'): - # one item - if len(X.shape) == 1: - if hasattr(X[0], 'shape'): - X = list(X) - else: - X = [X, ] - - # several items - elif len(X.shape) == 2: - nrows, ncols = X.shape - if nrows == 1: - X = [X] - elif ncols == 1: - X = [X.ravel()] - else: - X = [X[:, i] for i in xrange(ncols)] - else: - raise ValueError("input `X` must have 2 or fewer dimensions") - - if not hasattr(X[0], '__len__'): - X = [X] + X = _reshape_2D(X) ncols = len(X) if labels is None: @@ -1982,7 +1961,7 @@ def _compute_conf_interval(data, med, iqr, bootstrap): stats['mean'] = np.mean(x) # medians and quartiles - q1, med, q3 = np.percentile(x, [25, 50, 75]) + q1, med, q3 = np.percentile(x, [25, 50, 75]) # interquartile range stats['iqr'] = q3 - q1 @@ -2004,7 +1983,7 @@ def _compute_conf_interval(data, med, iqr, bootstrap): hival = np.max(x) else: whismsg = ('whis must be a float, valid string, or ' - 'list of percentiles') + 'list of percentiles') raise ValueError(whismsg) else: loval = np.percentile(x, whis[0]) @@ -2157,6 +2136,112 @@ def is_math_text(s): return even_dollars +def _reshape_2D(X): + """ + Converts a non-empty list or an ndarray of two or fewer dimensions + into a list of iterable objects so that in + + for v in _reshape_2D(X): + + v is iterable and can be used to instantiate a 1D array. + """ + if hasattr(X, 'shape'): + # one item + if len(X.shape) == 1: + if hasattr(X[0], 'shape'): + X = list(X) + else: + X = [X, ] + + # several items + elif len(X.shape) == 2: + nrows, ncols = X.shape + if nrows == 1: + X = [X] + elif ncols == 1: + X = [X.ravel()] + else: + X = [X[:, i] for i in xrange(ncols)] + else: + raise ValueError("input `X` must have 2 or fewer dimensions") + + if not hasattr(X[0], '__len__'): + X = [X] + + return X + + +def violin_stats(X, method, points=100): + ''' + Returns a list of dictionaries of data which can be used to draw a series + of violin plots. See the `Returns` section below to view the required keys + of the dictionary. Users can skip this function and pass a user-defined set + of dictionaries to the `axes.vplot` method instead of using MPL to do the + calculations. + + Parameters + ---------- + X : array-like + Sample data that will be used to produce the gaussian kernel density + estimates. Must have 2 or fewer dimensions. + + method : callable + The method used to calculate the kernel density estimate for each + column of data. When called via `method(v, coords)`, it should + return a vector of the values of the KDE evaluated at the values + specified in coords. + + points : scalar, default = 100 + Defines the number of points to evaluate each of the gaussian kernel + density estimates at. + + Returns + ------- + + A list of dictionaries containing the results for each column of data. + The dictionaries contain at least the following: + + - coords: A list of scalars containing the coordinates this particular + kernel density estimate was evaluated at. + - vals: A list of scalars containing the values of the kernel density + estimate at each of the coordinates given in `coords`. + - mean: The mean value for this column of data. + - median: The median value for this column of data. + - min: The minimum value for this column of data. + - max: The maximum value for this column of data. + ''' + + # List of dictionaries describing each of the violins. + vpstats = [] + + # Want X to be a list of data sequences + X = _reshape_2D(X) + + for x in X: + # Dictionary of results for this distribution + stats = {} + + # Calculate basic stats for the distribution + min_val = np.min(x) + max_val = np.max(x) + + # Evaluate the kernel density estimate + coords = np.linspace(min_val, max_val, points) + stats['vals'] = method(x, coords) + stats['coords'] = coords + + # Store additional statistics for this distribution + stats['mean'] = np.mean(x) + stats['median'] = np.median(x) + stats['min'] = min_val + stats['max'] = max_val + + # Append to output + vpstats.append(stats) + + return vpstats + + class _NestedClassGetter(object): # recipe from http://stackoverflow.com/a/11493777/741316 """ diff --git a/lib/matplotlib/mlab.py b/lib/matplotlib/mlab.py index 468058d3229e..bcf79d84d82b 100644 --- a/lib/matplotlib/mlab.py +++ b/lib/matplotlib/mlab.py @@ -3656,6 +3656,168 @@ def stineman_interp(xi,x,y,yp=None): 1/(dy1+dy2),)) return yi + +class GaussianKDE(object): + """ + Representation of a kernel-density estimate using Gaussian kernels. + + Call signature:: + kde = GaussianKDE(dataset, bw_method='silverman') + + Parameters + ---------- + dataset : array_like + Datapoints to estimate from. In case of univariate data this is a 1-D + array, otherwise a 2-D array with shape (# of dims, # of data). + + bw_method : str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. If a + scalar, this will be used directly as `kde.factor`. If a + callable, it should take a `GaussianKDE` instance as only + parameter and return a scalar. If None (default), 'scott' is used. + + Attributes + ---------- + dataset : ndarray + The dataset with which `gaussian_kde` was initialized. + + dim : int + Number of dimensions. + + num_dp : int + Number of datapoints. + + factor : float + The bandwidth factor, obtained from `kde.covariance_factor`, with which + the covariance matrix is multiplied. + + covariance : ndarray + The covariance matrix of `dataset`, scaled by the calculated bandwidth + (`kde.factor`). + + inv_cov : ndarray + The inverse of `covariance`. + + Methods + ------- + kde.evaluate(points) : ndarray + Evaluate the estimated pdf on a provided set of points. + + kde(points) : ndarray + Same as kde.evaluate(points) + + """ + + # This implementation with minor modification was too good to pass up. + # from scipy: https://github.com/scipy/scipy/blob/master/scipy/stats/kde.py + + def __init__(self, dataset, bw_method=None): + self.dataset = np.atleast_2d(dataset) + if not np.array(self.dataset).size > 1: + raise ValueError("`dataset` input should have multiple elements.") + + self.dim, self.num_dp = np.array(self.dataset).shape + + if bw_method is None: + pass + elif bw_method == 'scott': + self.covariance_factor = self.scotts_factor + elif bw_method == 'silverman': + self.covariance_factor = self.silverman_factor + elif (np.isscalar(bw_method) and not + isinstance(bw_method, six.string_types)): + self._bw_method = 'use constant' + self.covariance_factor = lambda: bw_method + elif callable(bw_method): + self._bw_method = bw_method + self.covariance_factor = lambda: self._bw_method(self) + else: + msg = "`bw_method` should be 'scott', 'silverman', a scalar " \ + "or a callable." + raise ValueError(msg) + + # Computes the covariance matrix for each Gaussian kernel using + # covariance_factor(). + + self.factor = self.covariance_factor() + # Cache covariance and inverse covariance of the data + if not hasattr(self, '_data_inv_cov'): + self.data_covariance = np.atleast_2d( + np.cov( + self.dataset, + rowvar=1, + bias=False)) + self.data_inv_cov = np.linalg.inv(self.data_covariance) + + self.covariance = self.data_covariance * self.factor ** 2 + self.inv_cov = self.data_inv_cov / self.factor ** 2 + self.norm_factor = np.sqrt( + np.linalg.det( + 2 * np.pi * self.covariance)) * self.num_dp + + def scotts_factor(self): + return np.power(self.num_dp, -1. / (self.dim + 4)) + + def silverman_factor(self): + return np.power( + self.num_dp * (self.dim + 2.0) / 4.0, -1. / (self.dim + 4)) + + # Default method to calculate bandwidth, can be overwritten by subclass + covariance_factor = scotts_factor + + def evaluate(self, points): + """Evaluate the estimated pdf on a set of points. + + Parameters + ---------- + points : (# of dimensions, # of points)-array + Alternatively, a (# of dimensions,) vector can be passed in and + treated as a single point. + + Returns + ------- + values : (# of points,)-array + The values at each point. + + Raises + ------ + ValueError : if the dimensionality of the input points is different + than the dimensionality of the KDE. + + """ + points = np.atleast_2d(points) + + dim, num_m = np.array(points).shape + if dim != self.dim: + msg = "points have dimension %s, dataset has dimension %s" % ( + dim, self.dim) + raise ValueError(msg) + + result = np.zeros((num_m,), dtype=np.float) + + if num_m >= self.num_dp: + # there are more points than data, so loop over data + for i in range(self.num_dp): + diff = self.dataset[:, i, np.newaxis] - points + tdiff = np.dot(self.inv_cov, diff) + energy = np.sum(diff * tdiff, axis=0) / 2.0 + result = result + np.exp(-energy) + else: + # loop over points + for i in range(num_m): + diff = self.dataset - points[:, i, np.newaxis] + tdiff = np.dot(self.inv_cov, diff) + energy = np.sum(diff * tdiff, axis=0) / 2.0 + result[i] = np.sum(np.exp(-energy), axis=0) + + result = result / self.norm_factor + + return result + + __call__ = evaluate + + ################################################## # Code related to things in and around polygons ################################################## diff --git a/lib/matplotlib/pylab.py b/lib/matplotlib/pylab.py index dd0a52f12276..86fe482ad65f 100644 --- a/lib/matplotlib/pylab.py +++ b/lib/matplotlib/pylab.py @@ -23,6 +23,7 @@ broken_barh - a set of horizontal bars with gaps box - set the axes frame on/off state boxplot - make a box and whisker plot + violinplot - make a violin plot cla - clear current axes clabel - label a contour plot clf - clear a figure window @@ -162,8 +163,8 @@ _Statistics - amax - the maximum along dimension m - amin - the minimum along dimension m + amax - the maximum along dimension m + amin - the minimum along dimension m corrcoef - correlation coefficient cov - covariance matrix mean - the mean along dimension m @@ -172,7 +173,8 @@ prod - the product along dimension m ptp - the max-min along dimension m std - the standard deviation along dimension m - asum - the sum along dimension m + asum - the sum along dimension m + ksdensity - the kernel density estimate _Time series analysis diff --git a/lib/matplotlib/pyplot.py b/lib/matplotlib/pyplot.py index da553fe212c6..a6f0a82dd8d1 100644 --- a/lib/matplotlib/pyplot.py +++ b/lib/matplotlib/pyplot.py @@ -3396,6 +3396,29 @@ def triplot(*args, **kwargs): return ret +# This function was autogenerated by boilerplate.py. Do not edit as +# changes will be lost +@_autogen_docstring(Axes.violinplot) +def violinplot(dataset, positions=None, vert=True, widths=0.5, showmeans=False, + showextrema=True, showmedians=False, points=100, bw_method=None, + hold=None): + ax = gca() + # allow callers to override the hold state by passing hold=True|False + washold = ax.ishold() + + if hold is not None: + ax.hold(hold) + try: + ret = ax.violinplot(dataset, positions=positions, vert=vert, + widths=widths, showmeans=showmeans, + showextrema=showextrema, showmedians=showmedians, + points=points, bw_method=bw_method) + draw_if_interactive() + finally: + ax.hold(washold) + + return ret + # This function was autogenerated by boilerplate.py. Do not edit as # changes will be lost @_autogen_docstring(Axes.vlines) diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_baseline.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_baseline.png new file mode 100644 index 000000000000..f84484368988 Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_baseline.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_custompoints_10.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_custompoints_10.png new file mode 100644 index 000000000000..c19b7f4b6f87 Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_custompoints_10.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_custompoints_200.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_custompoints_200.png new file mode 100644 index 000000000000..0f617ee10ee5 Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_custompoints_200.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showall.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showall.png new file mode 100644 index 000000000000..e19f48c12273 Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showall.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showextrema.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showextrema.png new file mode 100644 index 000000000000..00949738ede4 Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showextrema.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showmeans.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showmeans.png new file mode 100644 index 000000000000..2e5bad4d7d83 Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showmeans.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showmedians.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showmedians.png new file mode 100644 index 000000000000..918dc62c7ff6 Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showmedians.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_baseline.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_baseline.png new file mode 100644 index 000000000000..64f291eb9028 Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_baseline.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_custompoints_10.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_custompoints_10.png new file mode 100644 index 000000000000..b1ed10d72892 Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_custompoints_10.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_custompoints_200.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_custompoints_200.png new file mode 100644 index 000000000000..481c7be45fb4 Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_custompoints_200.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showall.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showall.png new file mode 100644 index 000000000000..80db9e88648a Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showall.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showextrema.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showextrema.png new file mode 100644 index 000000000000..449d379a1d68 Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showextrema.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showmeans.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showmeans.png new file mode 100644 index 000000000000..acd31065bace Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showmeans.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showmedians.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showmedians.png new file mode 100644 index 000000000000..bfd5e64f6ce6 Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showmedians.png differ diff --git a/lib/matplotlib/tests/test_axes.py b/lib/matplotlib/tests/test_axes.py index 2cbb7054f832..7acc2566e33c 100644 --- a/lib/matplotlib/tests/test_axes.py +++ b/lib/matplotlib/tests/test_axes.py @@ -1566,6 +1566,179 @@ def test_boxplot_bad_ci_2(): conf_intervals=[[1, 2], [1]]) +@image_comparison(baseline_images=['violinplot_vert_baseline'], + extensions=['png']) +def test_vert_violinplot_baseline(): + # First 9 digits of frac(sqrt(2)) + np.random.seed(414213562) + data = [np.random.normal(size=100) for i in range(4)] + ax = plt.axes() + ax.violinplot(data, positions=range(4), showmeans=0, showextrema=0, + showmedians=0) + + +@image_comparison(baseline_images=['violinplot_vert_showmeans'], + extensions=['png']) +def test_vert_violinplot_showmeans(): + ax = plt.axes() + # First 9 digits of frac(sqrt(3)) + np.random.seed(732050807) + data = [np.random.normal(size=100) for i in range(4)] + ax.violinplot(data, positions=range(4), showmeans=1, showextrema=0, + showmedians=0) + + +@image_comparison(baseline_images=['violinplot_vert_showextrema'], + extensions=['png']) +def test_vert_violinplot_showextrema(): + ax = plt.axes() + # First 9 digits of frac(sqrt(5)) + np.random.seed(236067977) + data = [np.random.normal(size=100) for i in range(4)] + ax.violinplot(data, positions=range(4), showmeans=0, showextrema=1, + showmedians=0) + + +@image_comparison(baseline_images=['violinplot_vert_showmedians'], + extensions=['png']) +def test_vert_violinplot_showmedians(): + ax = plt.axes() + # First 9 digits of frac(sqrt(7)) + np.random.seed(645751311) + data = [np.random.normal(size=100) for i in range(4)] + ax.violinplot(data, positions=range(4), showmeans=0, showextrema=0, + showmedians=1) + + +@image_comparison(baseline_images=['violinplot_vert_showall'], + extensions=['png']) +def test_vert_violinplot_showall(): + ax = plt.axes() + # First 9 digits of frac(sqrt(11)) + np.random.seed(316624790) + data = [np.random.normal(size=100) for i in range(4)] + ax.violinplot(data, positions=range(4), showmeans=1, showextrema=1, + showmedians=1) + + +@image_comparison(baseline_images=['violinplot_vert_custompoints_10'], + extensions=['png']) +def test_vert_violinplot_custompoints_10(): + ax = plt.axes() + # First 9 digits of frac(sqrt(13)) + np.random.seed(605551275) + data = [np.random.normal(size=100) for i in range(4)] + ax.violinplot(data, positions=range(4), showmeans=0, showextrema=0, + showmedians=0, points=10) + + +@image_comparison(baseline_images=['violinplot_vert_custompoints_200'], + extensions=['png']) +def test_vert_violinplot_custompoints_200(): + ax = plt.axes() + # First 9 digits of frac(sqrt(17)) + np.random.seed(123105625) + data = [np.random.normal(size=100) for i in range(4)] + ax.violinplot(data, positions=range(4), showmeans=0, showextrema=0, + showmedians=0, points=200) + + +@image_comparison(baseline_images=['violinplot_horiz_baseline'], + extensions=['png']) +def test_horiz_violinplot_baseline(): + ax = plt.axes() + # First 9 digits of frac(sqrt(19)) + np.random.seed(358898943) + data = [np.random.normal(size=100) for i in range(4)] + ax.violinplot(data, positions=range(4), vert=False, showmeans=0, + showextrema=0, showmedians=0) + + +@image_comparison(baseline_images=['violinplot_horiz_showmedians'], + extensions=['png']) +def test_horiz_violinplot_showmedians(): + ax = plt.axes() + # First 9 digits of frac(sqrt(23)) + np.random.seed(795831523) + data = [np.random.normal(size=100) for i in range(4)] + ax.violinplot(data, positions=range(4), vert=False, showmeans=0, + showextrema=0, showmedians=1) + + +@image_comparison(baseline_images=['violinplot_horiz_showmeans'], + extensions=['png']) +def test_horiz_violinplot_showmeans(): + ax = plt.axes() + # First 9 digits of frac(sqrt(29)) + np.random.seed(385164807) + data = [np.random.normal(size=100) for i in range(4)] + ax.violinplot(data, positions=range(4), vert=False, showmeans=1, + showextrema=0, showmedians=0) + + +@image_comparison(baseline_images=['violinplot_horiz_showextrema'], + extensions=['png']) +def test_horiz_violinplot_showextrema(): + ax = plt.axes() + # First 9 digits of frac(sqrt(31)) + np.random.seed(567764362) + data = [np.random.normal(size=100) for i in range(4)] + ax.violinplot(data, positions=range(4), vert=False, showmeans=0, + showextrema=1, showmedians=0) + + +@image_comparison(baseline_images=['violinplot_horiz_showall'], + extensions=['png']) +def test_horiz_violinplot_showall(): + ax = plt.axes() + # First 9 digits of frac(sqrt(37)) + np.random.seed(82762530) + data = [np.random.normal(size=100) for i in range(4)] + ax.violinplot(data, positions=range(4), vert=False, showmeans=1, + showextrema=1, showmedians=1) + + +@image_comparison(baseline_images=['violinplot_horiz_custompoints_10'], + extensions=['png']) +def test_horiz_violinplot_custompoints_10(): + ax = plt.axes() + # First 9 digits of frac(sqrt(41)) + np.random.seed(403124237) + data = [np.random.normal(size=100) for i in range(4)] + ax.violinplot(data, positions=range(4), vert=False, showmeans=0, + showextrema=0, showmedians=0, points=10) + + +@image_comparison(baseline_images=['violinplot_horiz_custompoints_200'], + extensions=['png']) +def test_horiz_violinplot_custompoints_200(): + ax = plt.axes() + # First 9 digits of frac(sqrt(43)) + np.random.seed(557438524) + data = [np.random.normal(size=100) for i in range(4)] + ax.violinplot(data, positions=range(4), vert=False, showmeans=0, + showextrema=0, showmedians=0, points=200) + + +@cleanup +def test_violinplot_bad_positions(): + ax = plt.axes() + # First 9 digits of frac(sqrt(47)) + np.random.seed(855654600) + data = [np.random.normal(size=100) for i in range(4)] + assert_raises(ValueError, ax.violinplot, data, positions=range(5)) + + +@cleanup +def test_violinplot_bad_widths(): + ax = plt.axes() + # First 9 digits of frac(sqrt(53)) + np.random.seed(280109889) + data = [np.random.normal(size=100) for i in range(4)] + assert_raises(ValueError, ax.violinplot, data, positions=range(4), + widths=[1, 2, 3]) + + @cleanup def test_manage_xticks(): _, ax = plt.subplots() diff --git a/lib/matplotlib/tests/test_mlab.py b/lib/matplotlib/tests/test_mlab.py index 3a863edb0a21..0cc991f05ebf 100644 --- a/lib/matplotlib/tests/test_mlab.py +++ b/lib/matplotlib/tests/test_mlab.py @@ -2758,6 +2758,192 @@ def get_z(x, y): np.ma.getmask(correct_zi_masked)) +#***************************************************************** +# These Tests where taken from SCIPY with some minor modifications +# this can be retreived from: +# https://github.com/scipy/scipy/blob/master/scipy/stats/tests/test_kdeoth.py +#***************************************************************** + +class gaussian_kde_tests(): + + def test_kde_integer_input(self): + """Regression test for #1181.""" + x1 = np.arange(5) + kde = mlab.GaussianKDE(x1) + y_expected = [0.13480721, 0.18222869, 0.19514935, 0.18222869, + 0.13480721] + np.testing.assert_array_almost_equal(kde(x1), y_expected, decimal=6) + + def test_gaussian_kde_covariance_caching(self): + x1 = np.array([-7, -5, 1, 4, 5], dtype=np.float) + xs = np.linspace(-10, 10, num=5) + # These expected values are from scipy 0.10, before some changes to + # gaussian_kde. They were not compared with any external reference. + y_expected = [0.02463386, 0.04689208, 0.05395444, 0.05337754, + 0.01664475] + + # set it to the default bandwidth. + kde2 = mlab.GaussianKDE(x1, 'scott') + y2 = kde2(xs) + + np.testing.assert_array_almost_equal(y_expected, y2, decimal=7) + + def test_kde_bandwidth_method(self): + + np.random.seed(8765678) + n_basesample = 50 + xn = np.random.randn(n_basesample) + + # Default + gkde = mlab.GaussianKDE(xn) + # Supply a callable + gkde2 = mlab.GaussianKDE(xn, 'scott') + # Supply a scalar + gkde3 = mlab.GaussianKDE(xn, bw_method=gkde.factor) + + xs = np.linspace(-7, 7, 51) + kdepdf = gkde.evaluate(xs) + kdepdf2 = gkde2.evaluate(xs) + assert_almost_equal(kdepdf.all(), kdepdf2.all()) + kdepdf3 = gkde3.evaluate(xs) + assert_almost_equal(kdepdf.all(), kdepdf3.all()) + + +class gaussian_kde_custom_tests(object): + def test_no_data(self): + """Pass no data into the GaussianKDE class.""" + assert_raises(ValueError, mlab.GaussianKDE, []) + + def test_single_dataset_element(self): + """Pass a single dataset element into the GaussianKDE class.""" + assert_raises(ValueError, mlab.GaussianKDE, [42]) + + def test_silverman_multidim_dataset(self): + """Use a multi-dimensional array as the dataset and test silverman's + output""" + x1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + assert_raises(np.linalg.LinAlgError, mlab.GaussianKDE, x1, "silverman") + + def test_silverman_singledim_dataset(self): + """Use a single dimension list as the dataset and test silverman's + output.""" + x1 = np.array([-7, -5, 1, 4, 5]) + mygauss = mlab.GaussianKDE(x1, "silverman") + y_expected = 0.76770389927475502 + assert_almost_equal(mygauss.covariance_factor(), y_expected, 7) + + def test_scott_multidim_dataset(self): + """Use a multi-dimensional array as the dataset and test scott's output + """ + x1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + assert_raises(np.linalg.LinAlgError, mlab.GaussianKDE, x1, "scott") + + def test_scott_singledim_dataset(self): + """Use a single-dimensional array as the dataset and test scott's + output""" + x1 = np.array([-7, -5, 1, 4, 5]) + mygauss = mlab.GaussianKDE(x1, "scott") + y_expected = 0.72477966367769553 + assert_almost_equal(mygauss.covariance_factor(), y_expected, 7) + + def test_scalar_empty_dataset(self): + """Use an empty array as the dataset and test the scalar's cov factor + """ + assert_raises(ValueError, mlab.GaussianKDE, [], bw_method=5) + + def test_scalar_covariance_dataset(self): + """Use a dataset and test a scalar's cov factor + """ + np.random.seed(8765678) + n_basesample = 50 + multidim_data = [np.random.randn(n_basesample) for i in range(5)] + + kde = mlab.GaussianKDE(multidim_data, bw_method=0.5) + assert_equal(kde.covariance_factor(), 0.5) + + def test_callable_covariance_dataset(self): + """Use a multi-dimensional array as the dataset and test the callable's + cov factor""" + np.random.seed(8765678) + n_basesample = 50 + multidim_data = [np.random.randn(n_basesample) for i in range(5)] + callable_fun = lambda x: 0.55 + kde = mlab.GaussianKDE(multidim_data, bw_method=callable_fun) + assert_equal(kde.covariance_factor(), 0.55) + + def test_callable_singledim_dataset(self): + """Use a single-dimensional array as the dataset and test the + callable's cov factor""" + np.random.seed(8765678) + n_basesample = 50 + multidim_data = np.random.randn(n_basesample) + + kde = mlab.GaussianKDE(multidim_data, bw_method='silverman') + y_expected = 0.48438841363348911 + assert_almost_equal(kde.covariance_factor(), y_expected, 7) + + def test_wrong_bw_method(self): + """Test the error message that should be called when bw is invalid.""" + np.random.seed(8765678) + n_basesample = 50 + data = np.random.randn(n_basesample) + assert_raises(ValueError, mlab.GaussianKDE, data, bw_method="invalid") + + +class gaussian_kde_evaluate_tests(object): + + def test_evaluate_diff_dim(self): + """Test the evaluate method when the dim's of dataset and points are + different dimensions""" + x1 = np.arange(3, 10, 2) + kde = mlab.GaussianKDE(x1) + x2 = np.arange(3, 12, 2) + y_expected = [ + 0.08797252, 0.11774109, 0.11774109, 0.08797252, 0.0370153 + ] + y = kde.evaluate(x2) + np.testing.assert_array_almost_equal(y, y_expected, 7) + + def test_evaluate_inv_dim(self): + """ Invert the dimensions. ie, Give the dataset a dimension of + 1 [3,2,4], and the points will have a dimension of 3 [[3],[2],[4]]. + ValueError should be raised""" + np.random.seed(8765678) + n_basesample = 50 + multidim_data = np.random.randn(n_basesample) + kde = mlab.GaussianKDE(multidim_data) + x2 = [[1], [2], [3]] + assert_raises(ValueError, kde.evaluate, x2) + + def test_evaluate_dim_and_num(self): + """ Tests if evaluated against a one by one array""" + x1 = np.arange(3, 10, 2) + x2 = np.array([3]) + kde = mlab.GaussianKDE(x1) + y_expected = [0.08797252] + y = kde.evaluate(x2) + np.testing.assert_array_almost_equal(y, y_expected, 7) + + def test_evaluate_point_dim_not_one(self): + """Test""" + x1 = np.arange(3, 10, 2) + x2 = [np.arange(3, 10, 2), np.arange(3, 10, 2)] + kde = mlab.GaussianKDE(x1) + assert_raises(ValueError, kde.evaluate, x2) + + def test_evaluate_equal_dim_and_num_lt(self): + """Test when line 3810 fails""" + x1 = np.arange(3, 10, 2) + x2 = np.arange(3, 8, 2) + kde = mlab.GaussianKDE(x1) + y_expected = [0.08797252, 0.11774109, 0.11774109] + y = kde.evaluate(x2) + np.testing.assert_array_almost_equal(y, y_expected, 7) + + +#***************************************************************** +#***************************************************************** + if __name__ == '__main__': import nose import sys