Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/rel-5.2.222'
Browse files Browse the repository at this point in the history
  • Loading branch information
Crunch.io Jenkins Account committed Aug 4, 2018
2 parents 36bf047 + bd1668b commit e207398
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 51 deletions.
81 changes: 53 additions & 28 deletions src/cr/cube/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np

from .subtotal import Subtotal
from .utils import lazyproperty
from .utils import lazyproperty, memoize


class Dimension(object):
Expand Down Expand Up @@ -48,15 +48,15 @@ def _get_type(cls, dim, selections=None):
'''
type_ = dim['type'].get('class')

if type_ and type_ == 'enum' and 'subreferences' in dim['references']:
return ('multiple_response'
if cls._is_multiple_response(selections)
else 'categorical_array')

if type_ and type_ == 'enum' and 'subtype' in dim['type']:
return dim['type']['subtype']['class']

if type_:
if type_ == 'enum':
if 'subreferences' in dim['references']:
return ('multiple_response'
if cls._is_multiple_response(selections)
else 'categorical_array')
if 'subtype' in dim['type']:
return dim['type']['subtype']['class']

return type_

return dim['type']['subtype']['class']
Expand Down Expand Up @@ -115,24 +115,35 @@ def _elements(self):
@property
def inserted_hs_indices(self):
'''Returns inserted H&S indices for the dimension.'''
if self.type == 'categorical_array':
if (self.type == 'categorical_array' or not self.subtotals):
return [] # For CA subvariables, we don't do H&S insertions

element_ids = [element['id'] for element in self.elements()]
elements = self.elements()
element_ids = [element['id'] for element in elements]

top_indexes = []
middle_indexes = []
bottom_indexes = []
for i, st in enumerate(self.subtotals):
anchor = st.anchor
if anchor == 'top':
top_indexes.append(i)
elif anchor == 'bottom':
bottom_indexes.append(i)
else:
middle_indexes.append(anchor)
len_top_indexes = len(top_indexes)

# push all top indexes to the top
top_indexes = list(range(len_top_indexes))

tops = [st for st in self.subtotals if st.anchor == 'top']
bottoms = [st for st in self.subtotals if st.anchor == 'bottom']
middles = [st for st in self.subtotals if st.anchor not in ['top', 'bottom']]
# adjust the middle_indexes appropriately
middle_indexes = [i + element_ids.index(index) + len_top_indexes + 1 for i, index in enumerate(middle_indexes)]

# what remains is the bottom
len_non_bottom_indexes = len_top_indexes + len(middle_indexes) + len(elements)
bottom_indexes = list(range(len_non_bottom_indexes, len_non_bottom_indexes + len(bottom_indexes)))

top_indexes = list(range(len(tops)))
middle_indexes = [
index + element_ids.index(insertion.anchor) + len(tops) + 1
for index, insertion in enumerate(middles)
]
bottom_indexes = [
index + len(tops) + len(middles) + len(self.elements())
for index, insertion in enumerate(bottoms)
]
return top_indexes + middle_indexes + bottom_indexes

def _transform_anchor(self, subtotal):
Expand Down Expand Up @@ -211,7 +222,7 @@ def labels(self, include_missing=False, include_transforms=False,
(self._get_name(el), el.get('id', -1))
)
for (i, el) in enumerate(self._elements)
if i in valid_indices
if include_missing or i not in self.invalid_indices
]

# Create subtotals names and insert them in labels after
Expand Down Expand Up @@ -265,19 +276,23 @@ def _include_in_labels(label_with_ind, valid_indices):

return label_with_ind['ind'] in valid_indices

@memoize
def elements(self, include_missing=False):
'''Get elements of the crunch Dimension.
For categorical variables, the elements are represented by categories
internally. For other variable types, actual 'elements' of the
Crunch Cube JSON response are returned.
'''
valid_indices = self.valid_indices(include_missing)
if include_missing:
return self._elements

return [
el for (i, el) in enumerate(self._elements)
if i in valid_indices
if i not in self.invalid_indices
]

@memoize
def valid_indices(self, include_missing):
'''Gets valid indices of Crunch Cube Dimension's elements.
Expand All @@ -289,8 +304,18 @@ def valid_indices(self, include_missing):
if include_missing:
return [i for (i, el) in enumerate(self._elements)]
else:
return [i for (i, el) in enumerate(self._elements)
if not el.get('missing')]
return [
i for (i, el) in enumerate(self._elements)
if not el.get('missing')
]

@lazyproperty
def invalid_indices(self):
return set([
i for (i, el) in enumerate(self._elements)
if el.get('missing')
])


@lazyproperty
def shape(self):
Expand Down
136 changes: 113 additions & 23 deletions src/cr/cube/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,124 @@
'''Utility functions for crunch cube, as well as other modules.'''
import os
import collections
import functools

try:
from itertools import ifilterfalse
except ImportError:
from itertools import filterfalse as ifilterfalse

import json
import os


class Counter(dict):
"""Mapping where default values are zero"""
def __missing__(self, key):
return 0


def load_fixture(fixtures_directory, filename):
'''Loads fixtures for CrunchCube integration tests.'''
"""Loads fixtures for CrunchCube integration tests."""
with open(os.path.join(fixtures_directory, filename)) as ctx_file:
fixture = json.load(ctx_file)
return fixture

class lazyproperty(property):
"""
borrowed from: https://stackoverflow.com/questions/3012421/python-memoising-deferred-lookup-property-decorator
"""
def __init__(self, func, name=None, doc=None):
self.__name__ = name or func.__name__
self.__module__ = func.__module__
self.__doc__ = doc or func.__doc__
self.func = func

def lazyproperty(func):
"""@lazyproperty decorator
def __set__(self, obj, value):
obj.__dict__[self.__name__] = value

Decorated method will be called only on first access to calculate
a cached property value. After that, the cached value is returned. Note
that the cached value is stored in a variable with the same name as the
decorated property, with the prefix '_cached_' prepended, such that the
cached value of property `foobar` is stored in `_cached_foobar`. Be aware
to avoid naming collisions.
"""
cache_attr_name = '_cached_%s' % func.__name__
docstring = func.__doc__

def get_prop_value(obj):
try:
return getattr(obj, cache_attr_name)
except AttributeError:
value = func(obj)
setattr(obj, cache_attr_name, value)
return value

return property(get_prop_value, doc=docstring)
def __get__(self, obj, type=None):
if obj is None:
return self
value = obj.__dict__.get(self.__name__, None)
if value is None:
value = self.func(obj)
obj.__dict__[self.__name__] = value
return value

def lru_cache(maxsize=100):
'''Least-recently-used cache decorator.
Arguments to the cached function must be hashable.
Cache performance statistics stored in f.hits and f.misses.
Clear the cache with f.clear().
http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
'''
maxqueue = maxsize * 10

def decorating_function(user_function,
len=len, iter=iter, tuple=tuple, sorted=sorted, KeyError=KeyError):
cache = {} # mapping of args to results
queue = collections.deque() # order that keys have been used
refcount = Counter() # times each key is in the queue
sentinel = object() # marker for looping around the queue
kwd_mark = object() # separate positional and keyword args

# lookup optimizations (ugly but fast)
queue_append, queue_popleft = queue.append, queue.popleft
queue_appendleft, queue_pop = queue.appendleft, queue.pop

@functools.wraps(user_function)
def wrapper(*args, **kwds):
# cache key records both positional and keyword args
key = args
if kwds:
key += (kwd_mark,) + tuple(sorted(kwds.items()))

# record recent use of this key
queue_append(key)
refcount[key] += 1

# get cache entry or compute if not found
try:
result = cache[key]
wrapper.hits += 1
except KeyError:
result = user_function(*args, **kwds)
cache[key] = result
wrapper.misses += 1

# purge least recently used cache entry
if len(cache) > maxsize:
key = queue_popleft()
refcount[key] -= 1
while refcount[key]:
key = queue_popleft()
refcount[key] -= 1
del cache[key], refcount[key]

# periodically compact the queue by eliminating duplicate keys
# while preserving order of most recent access
if len(queue) > maxqueue:
refcount.clear()
queue_appendleft(sentinel)
for key in ifilterfalse(refcount.__contains__,
iter(queue_pop, sentinel)):
queue_appendleft(key)
refcount[key] = 1

return result

def clear():
cache.clear()
queue.clear()
refcount.clear()
wrapper.hits = wrapper.misses = 0

wrapper.hits = wrapper.misses = 0
wrapper.clear = clear
return wrapper
return decorating_function


memoize = lru_cache(100)

0 comments on commit e207398

Please sign in to comment.