Permalink
Browse files

Merge pull request #326 from modilabs/latest

Latest
  • Loading branch information...
2 parents 42c932a + a658fc4 commit 6cb8267ddcd1a623392c36f9b9b8b32733223d2a @pld pld committed Dec 22, 2012
@@ -1,5 +1,7 @@
from pandas import concat, DataFrame, Series
+from bamboo.lib.utils import parse_float
+
class Aggregation(object):
"""Abstract class for all aggregations.
@@ -25,15 +27,19 @@ def eval(self, columns):
def group(self):
"""For when aggregation is called with a group parameter."""
- groupby = self.dframe[self.groups].join(
- self.column).groupby(self.groups, as_index=False)
-
- return groupby.agg(self.formula_name)
+ return self._groupby().agg(self.formula_name)
def agg(self):
"""For when aggregation is called without a group parameter."""
result = float(self.column.__getattribute__(self.formula_name)())
- return DataFrame({self.name: Series([result])})
+ return self._value_to_dframe(result)
+
+ def _value_to_dframe(self, value):
+ return DataFrame({self.name: Series([value])})
+
+ def _groupby(self):
+ return self.dframe[self.groups].join(
+ self.column).groupby(self.groups, as_index=False)
class MultiColumnAggregation(Aggregation):
@@ -95,10 +101,62 @@ class MaxAggregation(Aggregation):
formula_name = 'max'
+class ArgMaxAggregation(Aggregation):
+ """Return the index for the maximum of a column.
+
+ Written as ``argmax(FORMULA)``. Where `FORMULA` is a valid formula.
+ """
+
+ formula_name = 'argmax'
+
+ def group(self):
+ """For when aggregation is called with a group parameter."""
+ indices = self.column.apply(lambda value: parse_float(value, value)
+ ).reset_index().set_index(self.name)
+
+ def max_index_for_row(row):
+ return indices.get_value(row[self.name], 'index').max()
+
+ groupby_max = self._groupby().max()
+ column = groupby_max.apply(max_index_for_row, axis=1)
+ column.name = self.name
+
+ return DataFrame(column).join(groupby_max[self.groups])
+
+
+class NewestAggregation(MultiColumnAggregation):
+ """For the newest index column get the value column."""
+
+ formula_name = 'newest'
+
+ index_column = 0
+ value_column = 1
+
+ def agg(self):
+ idx = self.columns[self.index_column].argmax()
+ result = self.columns[self.value_column].ix[idx]
+
+ return self._value_to_dframe(result)
+
+ def group(self):
+ argmax_agg = ArgMaxAggregation(self.name, self.groups, self.dframe)
+ argmax_df = argmax_agg.eval(self.columns)
+ indices = argmax_df.pop(self.name)
+
+ newest_col = self.columns[self.value_column][indices]
+ newest_col.index = argmax_df.index
+
+ return argmax_df.join(newest_col)
+
+
+
class MeanAggregation(MultiColumnAggregation):
"""Calculate the arithmetic mean.
Written as ``mean(FORMULA)``. Where `FORMULA` is a valid formula.
+
+ Because mean is irreducible this inherits from `MultiColumnAggregation` to
+ use its reduce generic implementation.
"""
formula_name = 'mean'
@@ -214,7 +272,7 @@ def agg(self):
else:
result = len(self.dframe)
- return DataFrame({self.name: Series([result])})
+ return self._value_to_dframe(result)
# dict of formula names to aggregation classes
View
@@ -8,8 +8,16 @@
def parse_int(value, default):
+ return _parse_type(int, value, default)
+
+
+def parse_float(value, default):
+ return _parse_type(float, value, default)
+
+
+def _parse_type(_type, value, default):
try:
- return int(value)
+ return _type(value)
except ValueError:
return default
@@ -23,6 +23,8 @@ class TestAggregations(TestCalculator):
'ratio(risk_factor in ["low_risk"], 1)': 18.0 / 19,
'count()': 19.0,
'count(risk_factor in ["low_risk"])': 18.0,
+ 'argmax(submit_date)': 18.0,
+ 'newest(submit_date, amount)': 28.0,
}
GROUP_TO_RESULTS = {
@@ -51,6 +53,8 @@ def setUp(self):
'ratio(risk_factor in ["low_risk"], 1)',
'count(risk_factor in ["low_risk"])',
'count()',
+ 'argmax(submit_date)',
+ 'newest(submit_date, amount)',
]
self.expected_length = defaultdict(int)
self.groups_list = None
@@ -13,8 +13,7 @@ class TestFrame(TestBase):
def setUp(self):
TestBase.setUp(self)
- self.dframe = self.test_data['good_eats.csv'
- ]
+ self.dframe = self.test_data['good_eats.csv']
self.bframe = BambooFrame(self.dframe)
def _add_bamboo_reserved_keys(self, value=1):
Oops, something went wrong.

0 comments on commit 6cb8267

Please sign in to comment.