-
Notifications
You must be signed in to change notification settings - Fork 62
add reduce #366
add reduce #366
Changes from all commits
6669842
911de85
573860a
6fe14d5
5befff5
028e048
0120972
0369775
e080db8
e617ce4
1df6dec
864a26c
0351469
b9f35a9
6a944bf
bd439e3
9e2f45d
702b78a
a13a104
2d05dfc
ea0d9a0
122f526
5b26671
e39f58a
c2a108b
367d882
ee64830
d4215cf
6a0de00
8ff2214
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,65 +31,116 @@ | |
|
|
||
| import operator | ||
| import pandas | ||
| import copy | ||
| import numpy | ||
|
|
||
| import sdc | ||
|
|
||
| from numba import types | ||
| from numba.extending import (overload, overload_method, overload_attribute) | ||
| from sdc.hiframes.pd_dataframe_ext import DataFrameType | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move imports from sdc after imports from numba. |
||
| from numba.errors import TypingError | ||
| import sdc.datatypes.hpat_pandas_dataframe_types | ||
|
|
||
| from sdc.datatypes.hpat_pandas_series_functions import TypeChecker | ||
|
|
||
|
|
||
| # Example func_text for func_name='count' columns=('A', 'B'): | ||
| # | ||
| # def _df_count_impl(df, axis=0, level=None, numeric_only=False): | ||
| # series_A = init_series(get_dataframe_data(df, 0)) | ||
| # result_A = series_A.count(level=level) | ||
| # series_B = init_series(get_dataframe_data(df, 1)) | ||
| # result_B = series_B.count(level=level) | ||
| # return pandas.Series([result_A, result_B], ['A', 'B']) | ||
|
|
||
|
|
||
| def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, columns): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add a multi-line comment at the top of this function (not a docstring) with a short example of how func_text will look like (for example for mean and a DF with 1 column)?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm still seeing here docstring instead of comment |
||
| result_name_list = [] | ||
| joined = ', '.join(func_params) | ||
| func_lines = [f'def _df_{func_name}_impl({joined}):'] | ||
| for i, c in enumerate(columns): | ||
| result_c = f'result_{c}' | ||
| func_lines += [f' series_{c} = init_series(get_dataframe_data({func_params[0]}, {i}))', | ||
| f' {result_c} = series_{c}.{func_name}({series_params})'] | ||
| result_name_list.append(result_c) | ||
| all_results = ', '.join(result_name_list) | ||
| all_columns = ', '.join([f"'{c}'" for c in columns]) | ||
|
|
||
| func_lines += [f' return pandas.Series([{all_results}], [{all_columns}])'] | ||
| func_text = '\n'.join(func_lines) | ||
|
|
||
| global_vars = {'pandas': pandas, 'np': numpy, | ||
| 'init_series': sdc.hiframes.api.init_series, | ||
| 'get_dataframe_data': sdc.hiframes.pd_dataframe_ext.get_dataframe_data} | ||
|
|
||
| return func_text, global_vars | ||
|
|
||
| from sdc.datatypes.hpat_pandas_dataframe_types import DataFrameType | ||
| from sdc.utils import sdc_overload_method | ||
|
|
||
| def sdc_pandas_dataframe_reduce_columns(df, func_name, params, ser_params): | ||
| all_params = ['df'] | ||
| ser_par = [] | ||
|
|
||
| @sdc_overload_method(DataFrameType, 'count') | ||
| def sdc_pandas_dataframe_count(self, axis=0, level=None, numeric_only=False): | ||
| for key, value in params.items(): | ||
| all_params.append('{}={}'.format(key, value)) | ||
| for key, value in ser_params.items(): | ||
| ser_par.append('{}={}'.format(key, value)) | ||
|
|
||
| s_par = '{}'.format(', '.join(ser_par[:])) | ||
|
|
||
| df_func_name = f'_df_{func_name}_impl' | ||
|
|
||
| func_text, global_vars = _dataframe_reduce_columns_codegen(func_name, all_params, s_par, df.columns) | ||
|
|
||
| loc_vars = {} | ||
| exec(func_text, global_vars, loc_vars) | ||
| _reduce_impl = loc_vars[df_func_name] | ||
|
|
||
| return _reduce_impl | ||
Rubtsowa marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| @overload_method(DataFrameType, 'count') | ||
| def count_overload(df, axis=0, level=None, numeric_only=False): | ||
| """ | ||
| Pandas DataFrame method :meth:`pandas.DataFrame.count` implementation. | ||
| .. only:: developer | ||
| Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count | ||
| Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count | ||
| Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count1 | ||
| Parameters | ||
| ----------- | ||
| self: :class:`pandas.DataFrame` | ||
| input arg | ||
| input arg | ||
| axis: | ||
| *unsupported* | ||
| *unsupported* | ||
| level: | ||
| *unsupported* | ||
| *unsupported* | ||
| numeric_only: | ||
| *unsupported* | ||
| *unsupported* | ||
| Returns | ||
| ------- | ||
| :obj:`pandas.Series` or `pandas.DataFrame` | ||
| returns: For each column/row the number of non-NA/null entries. If level is specified returns a DataFrame. | ||
| for each column/row the number of non-NA/null entries. If level is specified returns a DataFrame. | ||
| """ | ||
|
Comment on lines
104
to
127
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Documentation is not like in other function. See #455 and docs from other functions. |
||
|
|
||
| _func_name = 'Method pandas.dataframe.count().' | ||
| name = 'count' | ||
|
|
||
| if not isinstance(self, DataFrameType): | ||
| raise TypingError('{} The object must be a pandas.dataframe. Given: {}'.format(_func_name, self)) | ||
| ty_checker = TypeChecker('Method {}().'.format(name)) | ||
| ty_checker.check(df, DataFrameType) | ||
|
|
||
| if not (isinstance(axis, types.Omitted) or axis == 0): | ||
| raise TypingError("{} 'axis' unsupported. Given: {}".format(_func_name, axis)) | ||
| ty_checker.raise_exc(axis, 'unsupported', 'axis') | ||
|
|
||
| if not (isinstance(level, types.Omitted) or level is None): | ||
| raise TypingError("{} 'level' unsupported. Given: {}".format(_func_name, axis)) | ||
| ty_checker.raise_exc(level, 'unsupported', 'level') | ||
|
|
||
| if not (isinstance(numeric_only, types.Omitted) or numeric_only is False): | ||
| raise TypingError("{} 'numeric_only' unsupported. Given: {}".format(_func_name, axis)) | ||
|
|
||
| def sdc_pandas_dataframe_count_impl(self, axis=0, level=None, numeric_only=False): | ||
| result_data = [] | ||
| result_index = [] | ||
|
|
||
| for dataframe_item in self._data: | ||
| item_count = dataframe_item.count() | ||
| item_name = dataframe_item._name | ||
| result_data.append(item_count) | ||
| result_index.append(item_name) | ||
| ty_checker.raise_exc(numeric_only, 'unsupported', 'numeric_only') | ||
|
|
||
| return pandas.Series(data=result_data, index=result_index) | ||
| params = {'axis': 0, 'level': None, 'numeric_only': False} | ||
| ser_par = {'level': 'level'} | ||
|
|
||
| return sdc_pandas_dataframe_count_impl | ||
| return sdc_pandas_dataframe_reduce_columns(df, name, params, ser_par) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1628,3 +1628,7 @@ def _impl(df, path_or_buf=None, sep=',', na_rep='', float_format=None, | |
| date_format, doublequote, escapechar, decimal) | ||
|
|
||
| return _impl | ||
|
|
||
|
|
||
| if not sdc.config.config_pipeline_hpat_default: | ||
| from sdc.datatypes.hpat_pandas_dataframe_functions import * | ||
|
Comment on lines
+1633
to
+1634
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please, consider another |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Revert this changes.