Skip to content

Commit

Permalink
Merge pull request #6689 from story645/category
Browse files Browse the repository at this point in the history
ENH: Str Categorical Axis Support
  • Loading branch information
tacaswell committed Jul 8, 2016
2 parents 94f2fb8 + 8a96281 commit 5c1e64d
Show file tree
Hide file tree
Showing 10 changed files with 428 additions and 566 deletions.
1 change: 1 addition & 0 deletions lib/matplotlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1489,6 +1489,7 @@ def _jupyter_nbextension_paths():
'matplotlib.tests.test_backend_svg',
'matplotlib.tests.test_basic',
'matplotlib.tests.test_bbox_tight',
'matplotlib.tests.test_category',
'matplotlib.tests.test_cbook',
'matplotlib.tests.test_coding_standards',
'matplotlib.tests.test_collections',
Expand Down
1 change: 1 addition & 0 deletions lib/matplotlib/axes/_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import matplotlib.collections as mcoll
import matplotlib.colors as mcolors
import matplotlib.contour as mcontour
import matplotlib.category as _ # <-registers a category unit converter
import matplotlib.dates as _ # <-registers a date unit converter
from matplotlib import docstring
import matplotlib.image as mimage
Expand Down
12 changes: 12 additions & 0 deletions lib/matplotlib/axis.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,7 @@ def __init__(self, axes, pickradius=15):
self.offsetText = self._get_offset_text()
self.majorTicks = []
self.minorTicks = []
self.unit_data = []
self.pickradius = pickradius

# Initialize here for testing; later add API
Expand Down Expand Up @@ -712,6 +713,17 @@ def _set_scale(self, value, **kwargs):
def limit_range_for_scale(self, vmin, vmax):
return self._scale.limit_range_for_scale(vmin, vmax, self.get_minpos())

@property
def unit_data(self):
"""Holds data that a ConversionInterface subclass relys on
to convert between labels and indexes
"""
return self._unit_data

@unit_data.setter
def unit_data(self, data):
self._unit_data = data

def get_children(self):
children = [self.label, self.offsetText]
majorticks = self.get_major_ticks()
Expand Down
151 changes: 151 additions & 0 deletions lib/matplotlib/category.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# -*- coding: utf-8 OA-*-za
"""
catch all for categorical functions
"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)

import six

import numpy as np

import matplotlib.units as units
import matplotlib.ticker as ticker


# pure hack for numpy 1.6 support
from distutils.version import LooseVersion

NP_NEW = (LooseVersion(np.version.version) >= LooseVersion('1.7'))


def to_array(data, maxlen=100):
if NP_NEW:
return np.array(data, dtype=np.unicode)
try:
vals = np.array(data, dtype=('|S', maxlen))
except UnicodeEncodeError:
# pure hack
vals = np.array([convert_to_string(d) for d in data])
return vals


class StrCategoryConverter(units.ConversionInterface):
@staticmethod
def convert(value, unit, axis):
"""Uses axis.unit_data map to encode
data as floats
"""
vmap = dict(axis.unit_data)

if isinstance(value, six.string_types):
return vmap[value]

vals = to_array(value)
for lab, loc in axis.unit_data:
vals[vals == lab] = loc

return vals.astype('float')

@staticmethod
def axisinfo(unit, axis):
seq, locs = zip(*axis.unit_data)
majloc = StrCategoryLocator(locs)
majfmt = StrCategoryFormatter(seq)
return units.AxisInfo(majloc=majloc, majfmt=majfmt)

@staticmethod
def default_units(data, axis):
# the conversion call stack is:
# default_units->axis_info->convert
axis.unit_data = map_categories(data, axis.unit_data)
return None


class StrCategoryLocator(ticker.FixedLocator):
def __init__(self, locs):
super(StrCategoryLocator, self).__init__(locs, None)


class StrCategoryFormatter(ticker.FixedFormatter):
def __init__(self, seq):
super(StrCategoryFormatter, self).__init__(seq)


def convert_to_string(value):
"""Helper function for numpy 1.6, can be replaced with
np.array(...,dtype=unicode) for all later versions of numpy"""

if isinstance(value, six.string_types):
return value
if np.isfinite(value):
value = np.asarray(value, dtype=str)[np.newaxis][0]
elif np.isnan(value):
value = 'nan'
elif np.isposinf(value):
value = 'inf'
elif np.isneginf(value):
value = '-inf'
else:
raise ValueError("Unconvertable {}".format(value))
return value


def map_categories(data, old_map=None):
"""Create mapping between unique categorical
values and numerical identifier.
Paramters
---------
data: iterable
sequence of values
old_map: list of tuple, optional
if not `None`, than old_mapping will be updated with new values and
previous mappings will remain unchanged)
sort: bool, optional
sort keys by ASCII value
Returns
-------
list of tuple
[(label, ticklocation),...]
"""

# code typical missing data in the negative range because
# everything else will always have positive encoding
# question able if it even makes sense
spdict = {'nan': -1.0, 'inf': -2.0, '-inf': -3.0}

if isinstance(data, six.string_types):
data = [data]

# will update this post cbook/dict support
strdata = to_array(data)
uniq = np.unique(strdata)

if old_map:
olabs, okeys = zip(*old_map)
svalue = max(okeys) + 1
else:
old_map, olabs, okeys = [], [], []
svalue = 0

category_map = old_map[:]

new_labs = [u for u in uniq if u not in olabs]
missing = [nl for nl in new_labs if nl in spdict.keys()]

category_map.extend([(m, spdict[m]) for m in missing])

new_labs = [nl for nl in new_labs if nl not in missing]

new_locs = np.arange(svalue, svalue + len(new_labs), dtype='float')
category_map.extend(list(zip(new_labs, new_locs)))
return category_map


# Connects the convertor to matplotlib
units.registry[str] = StrCategoryConverter()
units.registry[bytes] = StrCategoryConverter()
units.registry[six.text_type] = StrCategoryConverter()
Binary file not shown.
Binary file not shown.

0 comments on commit 5c1e64d

Please sign in to comment.