Skip to content

Commit

Permalink
Merge 5c29191 into 36bf047
Browse files Browse the repository at this point in the history
  • Loading branch information
percious committed Aug 3, 2018
2 parents 36bf047 + 5c29191 commit 268a9c0
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 11 deletions.
33 changes: 24 additions & 9 deletions src/cr/cube/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np

from .subtotal import Subtotal
from .utils import lazyproperty
from .utils import lazyproperty, memoize


class Dimension(object):
Expand Down Expand Up @@ -115,10 +115,11 @@ def _elements(self):
@property
def inserted_hs_indices(self):
'''Returns inserted H&S indices for the dimension.'''
if self.type == 'categorical_array':
if (self.type == 'categorical_array' or not self.subtotals):
return [] # For CA subvariables, we don't do H&S insertions

element_ids = [element['id'] for element in self.elements()]
elements = self.elements()
element_ids = [element['id'] for element in elements]

tops = [st for st in self.subtotals if st.anchor == 'top']
bottoms = [st for st in self.subtotals if st.anchor == 'bottom']
Expand All @@ -130,7 +131,7 @@ def inserted_hs_indices(self):
for index, insertion in enumerate(middles)
]
bottom_indexes = [
index + len(tops) + len(middles) + len(self.elements())
index + len(tops) + len(middles) + len(elements)
for index, insertion in enumerate(bottoms)
]
return top_indexes + middle_indexes + bottom_indexes
Expand Down Expand Up @@ -211,7 +212,7 @@ def labels(self, include_missing=False, include_transforms=False,
(self._get_name(el), el.get('id', -1))
)
for (i, el) in enumerate(self._elements)
if i in valid_indices
if include_missing or i not in self.invalid_indices
]

# Create subtotals names and insert them in labels after
Expand Down Expand Up @@ -265,19 +266,23 @@ def _include_in_labels(label_with_ind, valid_indices):

return label_with_ind['ind'] in valid_indices

@memoize
def elements(self, include_missing=False):
'''Get elements of the crunch Dimension.
For categorical variables, the elements are represented by categories
internally. For other variable types, actual 'elements' of the
Crunch Cube JSON response are returned.
'''
valid_indices = self.valid_indices(include_missing)
if include_missing:
return self._elements

return [
el for (i, el) in enumerate(self._elements)
if i in valid_indices
if i not in self.invalid_indices
]

@memoize
def valid_indices(self, include_missing):
'''Gets valid indices of Crunch Cube Dimension's elements.
Expand All @@ -289,8 +294,18 @@ def valid_indices(self, include_missing):
if include_missing:
return [i for (i, el) in enumerate(self._elements)]
else:
return [i for (i, el) in enumerate(self._elements)
if not el.get('missing')]
return [
i for (i, el) in enumerate(self._elements)
if not el.get('missing')
]

@lazyproperty
def invalid_indices(self):
return set([
i for (i, el) in enumerate(self._elements)
if el.get('missing')
])


@lazyproperty
def shape(self):
Expand Down
92 changes: 90 additions & 2 deletions src/cr/cube/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
'''Utility functions for crunch cube, as well as other modules.'''
import os
import collections
import functools
from itertools import ifilterfalse
import json
import os


class Counter(dict):
"""Mapping where default values are zero"""
def __missing__(self, key):
return 0


def load_fixture(fixtures_directory, filename):
'''Loads fixtures for CrunchCube integration tests.'''
"""Loads fixtures for CrunchCube integration tests."""
with open(os.path.join(fixtures_directory, filename)) as ctx_file:
fixture = json.load(ctx_file)
return fixture
Expand Down Expand Up @@ -32,3 +41,82 @@ def get_prop_value(obj):
return value

return property(get_prop_value, doc=docstring)


def lru_cache(maxsize=100):
'''Least-recently-used cache decorator.
Arguments to the cached function must be hashable.
Cache performance statistics stored in f.hits and f.misses.
Clear the cache with f.clear().
http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
'''
maxqueue = maxsize * 10

def decorating_function(user_function,
len=len, iter=iter, tuple=tuple, sorted=sorted, KeyError=KeyError):
cache = {} # mapping of args to results
queue = collections.deque() # order that keys have been used
refcount = Counter() # times each key is in the queue
sentinel = object() # marker for looping around the queue
kwd_mark = object() # separate positional and keyword args

# lookup optimizations (ugly but fast)
queue_append, queue_popleft = queue.append, queue.popleft
queue_appendleft, queue_pop = queue.appendleft, queue.pop

@functools.wraps(user_function)
def wrapper(*args, **kwds):
# cache key records both positional and keyword args
key = args
if kwds:
key += (kwd_mark,) + tuple(sorted(kwds.items()))

# record recent use of this key
queue_append(key)
refcount[key] += 1

# get cache entry or compute if not found
try:
result = cache[key]
wrapper.hits += 1
except KeyError:
result = user_function(*args, **kwds)
cache[key] = result
wrapper.misses += 1

# purge least recently used cache entry
if len(cache) > maxsize:
key = queue_popleft()
refcount[key] -= 1
while refcount[key]:
key = queue_popleft()
refcount[key] -= 1
del cache[key], refcount[key]

# periodically compact the queue by eliminating duplicate keys
# while preserving order of most recent access
if len(queue) > maxqueue:
refcount.clear()
queue_appendleft(sentinel)
for key in ifilterfalse(refcount.__contains__,
iter(queue_pop, sentinel)):
queue_appendleft(key)
refcount[key] = 1

return result

def clear():
cache.clear()
queue.clear()
refcount.clear()
wrapper.hits = wrapper.misses = 0

wrapper.hits = wrapper.misses = 0
wrapper.clear = clear
return wrapper
return decorating_function


memoize = lru_cache(100)

0 comments on commit 268a9c0

Please sign in to comment.