Merge remote-tracking branch 'origin/rel-5.2.222'

Crunch-io · Aug 4, 2018 · e207398 · e207398
2 parents 36bf047 + bd1668b
commit e207398
Show file tree

Hide file tree

Showing 2 changed files with 166 additions and 51 deletions.
diff --git a/src/cr/cube/dimension.py b/src/cr/cube/dimension.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 from .subtotal import Subtotal
-from .utils import lazyproperty
+from .utils import lazyproperty, memoize
 
 
 class Dimension(object):
@@ -48,15 +48,15 @@ def _get_type(cls, dim, selections=None):
         '''
         type_ = dim['type'].get('class')
 
-        if type_ and type_ == 'enum' and 'subreferences' in dim['references']:
-            return ('multiple_response'
-                    if cls._is_multiple_response(selections)
-                    else 'categorical_array')
-
-        if type_ and type_ == 'enum' and 'subtype' in dim['type']:
-            return dim['type']['subtype']['class']
-
         if type_:
+            if type_ == 'enum':
+                if 'subreferences' in dim['references']:
+                    return ('multiple_response'
+                            if cls._is_multiple_response(selections)
+                            else 'categorical_array')
+                if 'subtype' in dim['type']:
+                    return dim['type']['subtype']['class']
+
             return type_
 
         return dim['type']['subtype']['class']
@@ -115,24 +115,35 @@ def _elements(self):
     @property
     def inserted_hs_indices(self):
         '''Returns inserted H&S indices for the dimension.'''
-        if self.type == 'categorical_array':
+        if (self.type == 'categorical_array' or not self.subtotals):
             return []  # For CA subvariables, we don't do H&S insertions
 
-        element_ids = [element['id'] for element in self.elements()]
+        elements = self.elements()
+        element_ids = [element['id'] for element in elements]
+
+        top_indexes = []
+        middle_indexes = []
+        bottom_indexes = []
+        for i, st in enumerate(self.subtotals):
+            anchor = st.anchor
+            if anchor == 'top':
+                top_indexes.append(i)
+            elif anchor == 'bottom':
+                bottom_indexes.append(i)
+            else:
+                middle_indexes.append(anchor)
+        len_top_indexes = len(top_indexes)
+
+        # push all top indexes to the top
+        top_indexes = list(range(len_top_indexes))
 
-        tops = [st for st in self.subtotals if st.anchor == 'top']
-        bottoms = [st for st in self.subtotals if st.anchor == 'bottom']
-        middles = [st for st in self.subtotals if st.anchor not in ['top', 'bottom']]
+        # adjust the middle_indexes appropriately
+        middle_indexes = [i + element_ids.index(index) + len_top_indexes + 1 for i, index in enumerate(middle_indexes)]
+
+        # what remains is the bottom
+        len_non_bottom_indexes = len_top_indexes + len(middle_indexes) + len(elements)
+        bottom_indexes = list(range(len_non_bottom_indexes, len_non_bottom_indexes + len(bottom_indexes)))
 
-        top_indexes = list(range(len(tops)))
-        middle_indexes = [
-            index + element_ids.index(insertion.anchor) + len(tops) + 1
-            for index, insertion in enumerate(middles)
-        ]
-        bottom_indexes = [
-            index + len(tops) + len(middles) + len(self.elements())
-            for index, insertion in enumerate(bottoms)
-        ]
         return top_indexes + middle_indexes + bottom_indexes
 
     def _transform_anchor(self, subtotal):
@@ -211,7 +222,7 @@ def labels(self, include_missing=False, include_transforms=False,
                     (self._get_name(el), el.get('id', -1))
                 )
                 for (i, el) in enumerate(self._elements)
-                if i in valid_indices
+                if include_missing or i not in self.invalid_indices
             ]
 
         # Create subtotals names and insert them in labels after
@@ -265,19 +276,23 @@ def _include_in_labels(label_with_ind, valid_indices):
 
         return label_with_ind['ind'] in valid_indices
 
+    @memoize
     def elements(self, include_missing=False):
         '''Get elements of the crunch Dimension.
 
         For categorical variables, the elements are represented by categories
         internally. For other variable types, actual 'elements' of the
         Crunch Cube JSON response are returned.
         '''
-        valid_indices = self.valid_indices(include_missing)
+        if include_missing:
+            return self._elements
+
         return [
             el for (i, el) in enumerate(self._elements)
-            if i in valid_indices
+            if i not in self.invalid_indices
         ]
 
+    @memoize
     def valid_indices(self, include_missing):
         '''Gets valid indices of Crunch Cube Dimension's elements.
 
@@ -289,8 +304,18 @@ def valid_indices(self, include_missing):
         if include_missing:
             return [i for (i, el) in enumerate(self._elements)]
         else:
-            return [i for (i, el) in enumerate(self._elements)
-                    if not el.get('missing')]
+            return [
+                i for (i, el) in enumerate(self._elements)
+                if not el.get('missing')
+            ]
+
+    @lazyproperty
+    def invalid_indices(self):
+        return set([
+            i for (i, el) in enumerate(self._elements)
+            if el.get('missing')
+        ])
+
 
     @lazyproperty
     def shape(self):

diff --git a/src/cr/cube/utils/__init__.py b/src/cr/cube/utils/__init__.py
@@ -1,34 +1,124 @@
 '''Utility functions for crunch cube, as well as other modules.'''
-import os
+import collections
+import functools
+
+try:
+    from itertools import ifilterfalse
+except ImportError:
+    from itertools import filterfalse as ifilterfalse
+
 import json
+import os
+
+
+class Counter(dict):
+    """Mapping where default values are zero"""
+    def __missing__(self, key):
+        return 0
 
 
 def load_fixture(fixtures_directory, filename):
-    '''Loads fixtures for CrunchCube integration tests.'''
+    """Loads fixtures for CrunchCube integration tests."""
     with open(os.path.join(fixtures_directory, filename)) as ctx_file:
         fixture = json.load(ctx_file)
     return fixture
 
+class lazyproperty(property):
+    """
+    borrowed from: https://stackoverflow.com/questions/3012421/python-memoising-deferred-lookup-property-decorator
+    """
+    def __init__(self, func, name=None, doc=None):
+        self.__name__ = name or func.__name__
+        self.__module__ = func.__module__
+        self.__doc__ = doc or func.__doc__
+        self.func = func
 
-def lazyproperty(func):
-    """@lazyproperty decorator
+    def __set__(self, obj, value):
+        obj.__dict__[self.__name__] = value
 
-    Decorated method will be called only on first access to calculate
-    a cached property value. After that, the cached value is returned. Note
-    that the cached value is stored in a variable with the same name as the
-    decorated property, with the prefix '_cached_' prepended, such that the
-    cached value of property `foobar` is stored in `_cached_foobar`. Be aware
-    to avoid naming collisions.
-    """
-    cache_attr_name = '_cached_%s' % func.__name__
-    docstring = func.__doc__
-
-    def get_prop_value(obj):
-        try:
-            return getattr(obj, cache_attr_name)
-        except AttributeError:
-            value = func(obj)
-            setattr(obj, cache_attr_name, value)
-            return value
-
-    return property(get_prop_value, doc=docstring)
+    def __get__(self, obj, type=None):
+        if obj is None:
+            return self
+        value = obj.__dict__.get(self.__name__, None)
+        if value is None:
+            value = self.func(obj)
+            obj.__dict__[self.__name__] = value
+        return value
+
+def lru_cache(maxsize=100):
+    '''Least-recently-used cache decorator.
+
+    Arguments to the cached function must be hashable.
+    Cache performance statistics stored in f.hits and f.misses.
+    Clear the cache with f.clear().
+    http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
+
+    '''
+    maxqueue = maxsize * 10
+
+    def decorating_function(user_function,
+            len=len, iter=iter, tuple=tuple, sorted=sorted, KeyError=KeyError):
+        cache = {}                   # mapping of args to results
+        queue = collections.deque()  # order that keys have been used
+        refcount = Counter()         # times each key is in the queue
+        sentinel = object()          # marker for looping around the queue
+        kwd_mark = object()          # separate positional and keyword args
+
+        # lookup optimizations (ugly but fast)
+        queue_append, queue_popleft = queue.append, queue.popleft
+        queue_appendleft, queue_pop = queue.appendleft, queue.pop
+
+        @functools.wraps(user_function)
+        def wrapper(*args, **kwds):
+            # cache key records both positional and keyword args
+            key = args
+            if kwds:
+                key += (kwd_mark,) + tuple(sorted(kwds.items()))
+
+            # record recent use of this key
+            queue_append(key)
+            refcount[key] += 1
+
+            # get cache entry or compute if not found
+            try:
+                result = cache[key]
+                wrapper.hits += 1
+            except KeyError:
+                result = user_function(*args, **kwds)
+                cache[key] = result
+                wrapper.misses += 1
+
+                # purge least recently used cache entry
+                if len(cache) > maxsize:
+                    key = queue_popleft()
+                    refcount[key] -= 1
+                    while refcount[key]:
+                        key = queue_popleft()
+                        refcount[key] -= 1
+                    del cache[key], refcount[key]
+
+            # periodically compact the queue by eliminating duplicate keys
+            # while preserving order of most recent access
+            if len(queue) > maxqueue:
+                refcount.clear()
+                queue_appendleft(sentinel)
+                for key in ifilterfalse(refcount.__contains__,
+                                        iter(queue_pop, sentinel)):
+                    queue_appendleft(key)
+                    refcount[key] = 1
+
+            return result
+
+        def clear():
+            cache.clear()
+            queue.clear()
+            refcount.clear()
+            wrapper.hits = wrapper.misses = 0
+
+        wrapper.hits = wrapper.misses = 0
+        wrapper.clear = clear
+        return wrapper
+    return decorating_function
+
+
+memoize = lru_cache(100)