From 66698426587bcfc03b9750290c68164360daf681 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 5 Dec 2019 12:46:33 +0300 Subject: [PATCH 01/19] add reduce --- sdc/config.py | 5 + .../hpat_pandas_dataframe_functions.py | 143 +++++++++++------- sdc/hiframes/pd_dataframe_ext.py | 3 + 3 files changed, 94 insertions(+), 57 deletions(-) diff --git a/sdc/config.py b/sdc/config.py index 13c4c07b3..78a225da4 100644 --- a/sdc/config.py +++ b/sdc/config.py @@ -67,3 +67,8 @@ ''' Default value for a pointer intended to use as Numba.DefaultPassBuilder.define_nopython_pipeline() in overloaded function ''' + +use_default_dataframe = distutils_util.strtobool(os.getenv('SDC_CONFIG_USE_DEFAULT_DATAFRAME', 'True')) +''' +Config variable used to select DataFrameType model (default is legacy model) +''' diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index b17ceb47c..6b147c70a 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -31,64 +31,93 @@ import operator import pandas +import numpy + +import sdc from numba import types from numba.extending import (overload, overload_method, overload_attribute) +from sdc.hiframes.pd_dataframe_ext import DataFrameType from numba.errors import TypingError - -from sdc.datatypes.hpat_pandas_dataframe_types import DataFrameType - - -@overload_method(DataFrameType, 'count') -def sdc_pandas_dataframe_count(self, axis=0, level=None, numeric_only=False): - """ - Pandas DataFrame method :meth:`pandas.DataFrame.count` implementation. - - .. only:: developer - - Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count - - Parameters - ----------- - self: :class:`pandas.DataFrame` - input arg - axis: - *unsupported* - level: - *unsupported* - numeric_only: - *unsupported* - - Returns - ------- - :obj:`pandas.Series` or `pandas.DataFrame` - returns: For each column/row the number of non-NA/null entries. If level is specified returns a DataFrame. - """ - - _func_name = 'Method pandas.dataframe.count().' - - if not isinstance(self, DataFrameType): - raise TypingError('{} The object must be a pandas.dataframe. Given: {}'.format(_func_name, self)) - - if not (isinstance(axis, types.Omitted) or axis == 0): - raise TypingError("{} 'axis' unsupported. Given: {}".format(_func_name, axis)) - - if not (isinstance(level, types.Omitted) or level is None): - raise TypingError("{} 'level' unsupported. Given: {}".format(_func_name, axis)) - - if not (isinstance(numeric_only, types.Omitted) or numeric_only is False): - raise TypingError("{} 'numeric_only' unsupported. Given: {}".format(_func_name, axis)) - - def sdc_pandas_dataframe_count_impl(self, axis=0, level=None, numeric_only=False): - result_data = [] - result_index = [] - - for dataframe_item in self._data: - item_count = dataframe_item.count() - item_name = dataframe_item._name - result_data.append(item_count) - result_index.append(item_name) - - return pandas.Series(data=result_data, index=result_index) - - return sdc_pandas_dataframe_count_impl +from sdc.datatypes.hpat_pandas_series_functions import TypeChecker + +if not sdc.config.use_default_dataframe: + from sdc.datatypes.hpat_pandas_dataframe_types import DataFrameType + + @overload_method(DataFrameType, 'count') + def sdc_pandas_dataframe_count(self, axis=0, level=None, numeric_only=False): + """ + Pandas DataFrame method :meth:`pandas.DataFrame.count` implementation. + .. only:: developer + Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count + Parameters + ----------- + self: :class:`pandas.DataFrame` + input arg + axis: + *unsupported* + level: + *unsupported* + numeric_only: + *unsupported* + Returns + ------- + :obj:`pandas.Series` or `pandas.DataFrame` + returns: For each column/row the number of non-NA/null entries. If level is specified returns + a DataFrame. + """ + + _func_name = 'Method pandas.dataframe.count().' + + if not isinstance(self, DataFrameType): + raise TypingError('{} The object must be a pandas.dataframe. Given: {}'.format(_func_name, self)) + + if not (isinstance(axis, types.Omitted) or axis == 0): + raise TypingError("{} 'axis' unsupported. Given: {}".format(_func_name, axis)) + + if not (isinstance(level, types.Omitted) or level is None): + raise TypingError("{} 'level' unsupported. Given: {}".format(_func_name, axis)) + + if not (isinstance(numeric_only, types.Omitted) or numeric_only is False): + raise TypingError("{} 'numeric_only' unsupported. Given: {}".format(_func_name, axis)) + + def sdc_pandas_dataframe_count_impl(self, axis=0, level=None, numeric_only=False): + result_data = [] + result_index = [] + + for dataframe_item in self._data: + item_count = dataframe_item.count() + item_name = dataframe_item._name + result_data.append(item_count) + result_index.append(item_name) + + return pandas.Series(data=result_data, index=result_index) + + return sdc_pandas_dataframe_count_impl + +else: + def sdc_pandas_dataframe_reduce_columns(df, name, params): + saved_columns = df.columns + n_cols = len(saved_columns) + data_args = tuple('data{}'.format(i) for i in range(n_cols)) + all_params = ['df'] + for key, value in params: + all_params.append('{}={}'.format(key, value)) + func_definition = 'def _reduce_impl({}):'.format(', '.join(all_params)) + func_lines = [func_definition] + for i, d in enumerate(data_args): + line = ' {} = hpat.hiframes.api.init_series(hpat.hiframes.pd_dataframe_ext.get_dataframe_data(df, {}))' + func_lines.append(line.format(d + '_S', i)) + func_lines.append(' {} = {}.{}()'.format(d + '_O', d + '_S', name)) + func_lines.append(' data = np.array(({},))'.format( + ", ".join(d + '_O' for d in data_args))) + func_lines.append(' index = hpat.str_arr_ext.StringArray(({},))'.format( + ', '.join('"{}"'.format(c) for c in saved_columns))) + func_lines.append(' return hpat.hiframes.api.init_series(data, index)') + loc_vars = {} + func_text = '\n'.join(func_lines) + + exec(func_text, {'hpat': sdc, 'np': numpy}, loc_vars) + _reduce_impl = loc_vars['_reduce_impl'] + + return _reduce_impl diff --git a/sdc/hiframes/pd_dataframe_ext.py b/sdc/hiframes/pd_dataframe_ext.py index 61e988095..0ee230f76 100644 --- a/sdc/hiframes/pd_dataframe_ext.py +++ b/sdc/hiframes/pd_dataframe_ext.py @@ -1636,3 +1636,6 @@ def _impl(df, path_or_buf=None, sep=',', na_rep='', float_format=None, date_format, doublequote, escapechar, decimal) return _impl + + +from sdc.datatypes.hpat_pandas_dataframe_functions import * From 911de851c233b372e0c827c4a80af89a9a7f4d48 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 5 Dec 2019 12:54:51 +0300 Subject: [PATCH 02/19] add reduce --- sdc/config.py | 5 - .../hpat_pandas_dataframe_functions.py | 105 +++++------------- 2 files changed, 25 insertions(+), 85 deletions(-) diff --git a/sdc/config.py b/sdc/config.py index 78a225da4..13c4c07b3 100644 --- a/sdc/config.py +++ b/sdc/config.py @@ -67,8 +67,3 @@ ''' Default value for a pointer intended to use as Numba.DefaultPassBuilder.define_nopython_pipeline() in overloaded function ''' - -use_default_dataframe = distutils_util.strtobool(os.getenv('SDC_CONFIG_USE_DEFAULT_DATAFRAME', 'True')) -''' -Config variable used to select DataFrameType model (default is legacy model) -''' diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index 6b147c70a..a82fb2f19 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -41,83 +41,28 @@ from numba.errors import TypingError from sdc.datatypes.hpat_pandas_series_functions import TypeChecker -if not sdc.config.use_default_dataframe: - from sdc.datatypes.hpat_pandas_dataframe_types import DataFrameType - - @overload_method(DataFrameType, 'count') - def sdc_pandas_dataframe_count(self, axis=0, level=None, numeric_only=False): - """ - Pandas DataFrame method :meth:`pandas.DataFrame.count` implementation. - .. only:: developer - Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count - Parameters - ----------- - self: :class:`pandas.DataFrame` - input arg - axis: - *unsupported* - level: - *unsupported* - numeric_only: - *unsupported* - Returns - ------- - :obj:`pandas.Series` or `pandas.DataFrame` - returns: For each column/row the number of non-NA/null entries. If level is specified returns - a DataFrame. - """ - - _func_name = 'Method pandas.dataframe.count().' - - if not isinstance(self, DataFrameType): - raise TypingError('{} The object must be a pandas.dataframe. Given: {}'.format(_func_name, self)) - - if not (isinstance(axis, types.Omitted) or axis == 0): - raise TypingError("{} 'axis' unsupported. Given: {}".format(_func_name, axis)) - - if not (isinstance(level, types.Omitted) or level is None): - raise TypingError("{} 'level' unsupported. Given: {}".format(_func_name, axis)) - - if not (isinstance(numeric_only, types.Omitted) or numeric_only is False): - raise TypingError("{} 'numeric_only' unsupported. Given: {}".format(_func_name, axis)) - - def sdc_pandas_dataframe_count_impl(self, axis=0, level=None, numeric_only=False): - result_data = [] - result_index = [] - - for dataframe_item in self._data: - item_count = dataframe_item.count() - item_name = dataframe_item._name - result_data.append(item_count) - result_index.append(item_name) - - return pandas.Series(data=result_data, index=result_index) - - return sdc_pandas_dataframe_count_impl - -else: - def sdc_pandas_dataframe_reduce_columns(df, name, params): - saved_columns = df.columns - n_cols = len(saved_columns) - data_args = tuple('data{}'.format(i) for i in range(n_cols)) - all_params = ['df'] - for key, value in params: - all_params.append('{}={}'.format(key, value)) - func_definition = 'def _reduce_impl({}):'.format(', '.join(all_params)) - func_lines = [func_definition] - for i, d in enumerate(data_args): - line = ' {} = hpat.hiframes.api.init_series(hpat.hiframes.pd_dataframe_ext.get_dataframe_data(df, {}))' - func_lines.append(line.format(d + '_S', i)) - func_lines.append(' {} = {}.{}()'.format(d + '_O', d + '_S', name)) - func_lines.append(' data = np.array(({},))'.format( - ", ".join(d + '_O' for d in data_args))) - func_lines.append(' index = hpat.str_arr_ext.StringArray(({},))'.format( - ', '.join('"{}"'.format(c) for c in saved_columns))) - func_lines.append(' return hpat.hiframes.api.init_series(data, index)') - loc_vars = {} - func_text = '\n'.join(func_lines) - - exec(func_text, {'hpat': sdc, 'np': numpy}, loc_vars) - _reduce_impl = loc_vars['_reduce_impl'] - - return _reduce_impl +def sdc_pandas_dataframe_reduce_columns(df, name, params): + saved_columns = df.columns + n_cols = len(saved_columns) + data_args = tuple('data{}'.format(i) for i in range(n_cols)) + all_params = ['df'] + for key, value in params: + all_params.append('{}={}'.format(key, value)) + func_definition = 'def _reduce_impl({}):'.format(', '.join(all_params)) + func_lines = [func_definition] + for i, d in enumerate(data_args): + line = ' {} = hpat.hiframes.api.init_series(hpat.hiframes.pd_dataframe_ext.get_dataframe_data(df, {}))' + func_lines.append(line.format(d + '_S', i)) + func_lines.append(' {} = {}.{}()'.format(d + '_O', d + '_S', name)) + func_lines.append(' data = np.array(({},))'.format( + ", ".join(d + '_O' for d in data_args))) + func_lines.append(' index = hpat.str_arr_ext.StringArray(({},))'.format( + ', '.join('"{}"'.format(c) for c in saved_columns))) + func_lines.append(' return hpat.hiframes.api.init_series(data, index)') + loc_vars = {} + func_text = '\n'.join(func_lines) + + exec(func_text, {'hpat': sdc, 'np': numpy}, loc_vars) + _reduce_impl = loc_vars['_reduce_impl'] + + return _reduce_impl From 573860abf00a712a93b7df04b20d0df1b16b7037 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Fri, 6 Dec 2019 18:23:49 +0300 Subject: [PATCH 03/19] change --- sdc/datatypes/hpat_pandas_dataframe_functions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index a82fb2f19..3cbcff556 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -42,9 +42,9 @@ from sdc.datatypes.hpat_pandas_series_functions import TypeChecker def sdc_pandas_dataframe_reduce_columns(df, name, params): + saved_columns = df.columns - n_cols = len(saved_columns) - data_args = tuple('data{}'.format(i) for i in range(n_cols)) + data_args = tuple('data{}'.format(i) for i in range(len(saved_columns))) all_params = ['df'] for key, value in params: all_params.append('{}={}'.format(key, value)) @@ -62,7 +62,7 @@ def sdc_pandas_dataframe_reduce_columns(df, name, params): loc_vars = {} func_text = '\n'.join(func_lines) - exec(func_text, {'hpat': sdc, 'np': numpy}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': numpy}, loc_vars) _reduce_impl = loc_vars['_reduce_impl'] return _reduce_impl From 6fe14d5e75df9c5de674f671ae781e87ddaaeca0 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Mon, 9 Dec 2019 11:43:56 +0300 Subject: [PATCH 04/19] change hpat->sdc, change input parameters --- .../hpat_pandas_dataframe_functions.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index 3cbcff556..f390d490c 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -31,6 +31,7 @@ import operator import pandas +import copy import numpy import sdc @@ -41,24 +42,28 @@ from numba.errors import TypingError from sdc.datatypes.hpat_pandas_series_functions import TypeChecker -def sdc_pandas_dataframe_reduce_columns(df, name, params): +def sdc_pandas_dataframe_reduce_columns(df, name, params_s, params_df): saved_columns = df.columns data_args = tuple('data{}'.format(i) for i in range(len(saved_columns))) all_params = ['df'] - for key, value in params: + + for key, value in params_s: all_params.append('{}={}'.format(key, value)) + ap = all_params.copy() + ap.pop(0) + par = '{}'.format(', '.join(ap)) func_definition = 'def _reduce_impl({}):'.format(', '.join(all_params)) func_lines = [func_definition] for i, d in enumerate(data_args): - line = ' {} = hpat.hiframes.api.init_series(hpat.hiframes.pd_dataframe_ext.get_dataframe_data(df, {}))' + line = ' {} = sdc.hiframes.api.init_series(sdc.hiframes.pd_dataframe_ext.get_dataframe_data(df, {}))' func_lines.append(line.format(d + '_S', i)) - func_lines.append(' {} = {}.{}()'.format(d + '_O', d + '_S', name)) + func_lines.append(' {} = {}.{}({})'.format(d + '_O', d + '_S', name, par)) func_lines.append(' data = np.array(({},))'.format( ", ".join(d + '_O' for d in data_args))) - func_lines.append(' index = hpat.str_arr_ext.StringArray(({},))'.format( + func_lines.append(' index = sdc.str_arr_ext.StringArray(({},))'.format( ', '.join('"{}"'.format(c) for c in saved_columns))) - func_lines.append(' return hpat.hiframes.api.init_series(data, index)') + func_lines.append(' return sdc.hiframes.api.init_series(data, index)') loc_vars = {} func_text = '\n'.join(func_lines) From 028e048b8b61f5696445613a8ebf5281330dbfc9 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Tue, 17 Dec 2019 10:03:55 +0300 Subject: [PATCH 05/19] change --- sdc/datatypes/hpat_pandas_dataframe_functions.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index f390d490c..e45f7864a 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -42,23 +42,22 @@ from numba.errors import TypingError from sdc.datatypes.hpat_pandas_series_functions import TypeChecker -def sdc_pandas_dataframe_reduce_columns(df, name, params_s, params_df): +def sdc_pandas_dataframe_reduce_columns(df, name, params): saved_columns = df.columns data_args = tuple('data{}'.format(i) for i in range(len(saved_columns))) all_params = ['df'] - for key, value in params_s: + for key, value in params: all_params.append('{}={}'.format(key, value)) ap = all_params.copy() - ap.pop(0) - par = '{}'.format(', '.join(ap)) + par = '{}'.format(', '.join(ap[1:])) func_definition = 'def _reduce_impl({}):'.format(', '.join(all_params)) func_lines = [func_definition] for i, d in enumerate(data_args): - line = ' {} = sdc.hiframes.api.init_series(sdc.hiframes.pd_dataframe_ext.get_dataframe_data(df, {}))' + line = ' {} = sdc.hiframes.api.init_series(sdc.hiframes.pd_dataframe_ext.get_dataframe_data(all_params[0], {}))' func_lines.append(line.format(d + '_S', i)) - func_lines.append(' {} = {}.{}({})'.format(d + '_O', d + '_S', name, par)) + func_lines.append(' {}_O = {}_S.{}({})'.format(d, d, name, par)) func_lines.append(' data = np.array(({},))'.format( ", ".join(d + '_O' for d in data_args))) func_lines.append(' index = sdc.str_arr_ext.StringArray(({},))'.format( From 0369775c4bd81c6f94452c9fb0a306637e17bdf1 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Wed, 18 Dec 2019 19:59:44 +0300 Subject: [PATCH 06/19] add example for check reduce --- .../hpat_pandas_dataframe_functions.py | 55 +++++++++++++++++-- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index 8db2aeefc..9a0ede59b 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -43,22 +43,23 @@ from sdc.datatypes.hpat_pandas_series_functions import TypeChecker -def sdc_pandas_dataframe_reduce_columns(df, name, params): +def sdc_pandas_dataframe_reduce_columns(df, name, series_call_params): saved_columns = df.columns data_args = tuple('data{}'.format(i) for i in range(len(saved_columns))) all_params = ['df'] - for key, value in params: + for key, value in series_call_params: all_params.append('{}={}'.format(key, value)) - ap = all_params.copy() - par = '{}'.format(', '.join(ap[1:])) + # This relies on parameters part of the signature of Series method called below being the same + # as for the corresponding DataFrame method + series_call_params_str = '{}'.format(', '.join(all_params[1:])) func_definition = 'def _reduce_impl({}):'.format(', '.join(all_params)) func_lines = [func_definition] for i, d in enumerate(data_args): line = ' {} = sdc.hiframes.api.init_series(sdc.hiframes.pd_dataframe_ext.get_dataframe_data(all_params[0], {}))' func_lines.append(line.format(d + '_S', i)) - func_lines.append(' {}_O = {}_S.{}({})'.format(d, d, name, par)) + func_lines.append(' {}_O = {}_S.{}({})'.format(d, d, name, series_call_params_str)) func_lines.append(' data = np.array(({},))'.format( ", ".join(d + '_O' for d in data_args))) func_lines.append(' index = sdc.str_arr_ext.StringArray(({},))'.format( @@ -73,3 +74,47 @@ def sdc_pandas_dataframe_reduce_columns(df, name, params): return _reduce_impl +@overload_method(DataFrameType, 'count') +def count_overload(df, axis=0, level=None, numeric_only=False): + """ + Pandas DataFrame method :meth:`pandas.DataFrame.count` implementation. + + .. only:: developer + + Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count + Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count1 + + Parameters + ----------- + self: :class:`pandas.DataFrame` + input arg + axis: + *unsupported* + level: + *unsupported* + numeric_only: + *unsupported* + + Returns + ------- + :obj:`pandas.Series` or `pandas.DataFrame` + for each column/row the number of non-NA/null entries. If level is specified returns a DataFrame. + """ + + name = 'count' + + ty_checker = TypeChecker('Method {}().'.format(name)) + ty_checker.check(df, DataFrameType) + + if not (isinstance(axis, types.Omitted) or axis == 0): + ty_checker.raise_exc(axis, 'unsupported', 'axis') + + if not (isinstance(level, types.Omitted) or level is None): + ty_checker.raise_exc(level, 'unsupported', 'level') + + if not (isinstance(numeric_only, types.Omitted) or numeric_only is False): + ty_checker.raise_exc(numeric_only, 'unsupported', 'numeric_only') + + params = [('axis', None), ('level', None), ('numeric_only', numeric_only)] + + return sdc_pandas_dataframe_reduce_columns(df, name, params) From e080db8525a0b0105e3091d6fa69e4b700157326 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 19 Dec 2019 13:30:41 +0300 Subject: [PATCH 07/19] division into 2 functions --- .../hpat_pandas_dataframe_functions.py | 60 ++++++++++++------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index 9a0ede59b..d77db88d9 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -43,33 +43,51 @@ from sdc.datatypes.hpat_pandas_series_functions import TypeChecker -def sdc_pandas_dataframe_reduce_columns(df, name, series_call_params): - saved_columns = df.columns - data_args = tuple('data{}'.format(i) for i in range(len(saved_columns))) +def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, columns): + result_name_list = [] + joined = ', '.join(func_params) + func_lines = [f'def _df_{func_name}_impl({joined}):'] + for i, c in enumerate(columns): + result_c = f'result_{c}' + func_lines += [f' series_{c} = init_series(get_dataframe_data({func_params[0]}, {i}))', + f' {result_c} = series_{c}.{func_name}({series_params})'] + result_name_list.append(result_c) + print(result_name_list) + all_results = ', '.join(result_name_list) + print(all_results) + all_columns = ', '.join([f"'{c}'" for c in columns]) + + func_lines += [f' return pandas.Series([{all_results}], [{all_columns}])'] + func_text = '\n'.join(func_lines) + + global_vars = {'pandas': pandas, 'np': numpy, + 'init_series': sdc.hiframes.api.init_series, + 'get_dataframe_data': sdc.hiframes.pd_dataframe_ext.get_dataframe_data} + + return func_text, global_vars + + +def sdc_pandas_dataframe_reduce_columns(df, func_name, params): all_params = ['df'] - for key, value in series_call_params: + for key, value in params: all_params.append('{}={}'.format(key, value)) - # This relies on parameters part of the signature of Series method called below being the same - # as for the corresponding DataFrame method - series_call_params_str = '{}'.format(', '.join(all_params[1:])) - func_definition = 'def _reduce_impl({}):'.format(', '.join(all_params)) - func_lines = [func_definition] - for i, d in enumerate(data_args): - line = ' {} = sdc.hiframes.api.init_series(sdc.hiframes.pd_dataframe_ext.get_dataframe_data(all_params[0], {}))' - func_lines.append(line.format(d + '_S', i)) - func_lines.append(' {}_O = {}_S.{}({})'.format(d, d, name, series_call_params_str)) - func_lines.append(' data = np.array(({},))'.format( - ", ".join(d + '_O' for d in data_args))) - func_lines.append(' index = sdc.str_arr_ext.StringArray(({},))'.format( - ', '.join('"{}"'.format(c) for c in saved_columns))) - func_lines.append(' return sdc.hiframes.api.init_series(data, index)') + ap = all_params.copy() + par = '{}'.format(', '.join(ap[1:])) + # param = 'level' + # if param in params + # par += 'level=' + param + # else + # par += 'level=' + 'None' + df_func_name = f'_df_{func_name}_impl' + + func_text, global_vars = _dataframe_reduce_columns_codegen(func_name, all_params, par, df.columns) + print(func_text, global_vars) loc_vars = {} - func_text = '\n'.join(func_lines) - exec(func_text, {'sdc': sdc, 'np': numpy}, loc_vars) - _reduce_impl = loc_vars['_reduce_impl'] + exec(func_text, global_vars, loc_vars) + _reduce_impl = loc_vars[df_func_name] return _reduce_impl From e617ce470a4c3265bd8b0873b91f83b2be87364e Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Fri, 20 Dec 2019 12:45:22 +0300 Subject: [PATCH 08/19] add selection of parameters --- .../hpat_pandas_dataframe_functions.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index d77db88d9..21a80f729 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -70,11 +70,18 @@ def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, col def sdc_pandas_dataframe_reduce_columns(df, func_name, params): all_params = ['df'] - - for key, value in params: - all_params.append('{}={}'.format(key, value)) + par1 = {'count': ['level']} + + if func_name in par1: + for key, value in params: + if key in par1[func_name]: + all_params.append('{}={}'.format(key, value)) + else: + for key, value in params: + all_params.append('{}={}'.format(key, value)) ap = all_params.copy() par = '{}'.format(', '.join(ap[1:])) + # param = 'level' # if param in params # par += 'level=' + param @@ -83,9 +90,9 @@ def sdc_pandas_dataframe_reduce_columns(df, func_name, params): df_func_name = f'_df_{func_name}_impl' func_text, global_vars = _dataframe_reduce_columns_codegen(func_name, all_params, par, df.columns) - print(func_text, global_vars) - loc_vars = {} + loc_vars = {} + print(global_vars, loc_vars) exec(func_text, global_vars, loc_vars) _reduce_impl = loc_vars[df_func_name] From 1df6dec832e5375ee704ff488626e2c4e91e3273 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Fri, 20 Dec 2019 14:49:43 +0300 Subject: [PATCH 09/19] comment string in __init__ --- sdc/__init__.py | 2 +- sdc/datatypes/hpat_pandas_dataframe_functions.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/sdc/__init__.py b/sdc/__init__.py index 32363380c..9841409e3 100644 --- a/sdc/__init__.py +++ b/sdc/__init__.py @@ -60,7 +60,7 @@ """ sdc.config.numba_compiler_define_nopython_pipeline_orig = numba.compiler.DefaultPassBuilder.define_nopython_pipeline - numba.compiler.DefaultPassBuilder.define_nopython_pipeline = sdc.datatypes.hpat_pandas_dataframe_pass.sdc_nopython_pipeline_lite_register + # numba.compiler.DefaultPassBuilder.define_nopython_pipeline = sdc.datatypes.hpat_pandas_dataframe_pass.sdc_nopython_pipeline_lite_register def _init_extension(): '''Register Pandas classes and functions with Numba. diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index 21a80f729..cbe7f608f 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -82,11 +82,6 @@ def sdc_pandas_dataframe_reduce_columns(df, func_name, params): ap = all_params.copy() par = '{}'.format(', '.join(ap[1:])) - # param = 'level' - # if param in params - # par += 'level=' + param - # else - # par += 'level=' + 'None' df_func_name = f'_df_{func_name}_impl' func_text, global_vars = _dataframe_reduce_columns_codegen(func_name, all_params, par, df.columns) @@ -98,6 +93,12 @@ def sdc_pandas_dataframe_reduce_columns(df, func_name, params): return _reduce_impl +# param = 'level' +# if param in params +# par += 'level=' + param +# else +# par += 'level=' + 'None' + @overload_method(DataFrameType, 'count') def count_overload(df, axis=0, level=None, numeric_only=False): From 864a26c2df34129c35b05c5124e91f885dc0e7a7 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Fri, 20 Dec 2019 15:16:26 +0300 Subject: [PATCH 10/19] import ovetload for DF --- sdc/datatypes/hpat_pandas_dataframe_functions.py | 1 + sdc/datatypes/hpat_pandas_dataframe_types.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index cbe7f608f..344e52d27 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -40,6 +40,7 @@ from numba.extending import (overload, overload_method, overload_attribute) from sdc.hiframes.pd_dataframe_ext import DataFrameType from numba.errors import TypingError +import sdc.datatypes.hpat_pandas_dataframe_types from sdc.datatypes.hpat_pandas_series_functions import TypeChecker diff --git a/sdc/datatypes/hpat_pandas_dataframe_types.py b/sdc/datatypes/hpat_pandas_dataframe_types.py index ff8a16766..03f79934a 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_types.py +++ b/sdc/datatypes/hpat_pandas_dataframe_types.py @@ -110,7 +110,7 @@ def iterator_type(self): return DataFrameTypeIterator(self) -if config_pipeline_hpat_default is 0: +if not config_pipeline_hpat_default: @register_model(DataFrameType) class DataFrameTypeModel(StructModel): """ @@ -163,7 +163,7 @@ def _hpat_pandas_dataframe_init_codegen(context, builder, signature, args): return sig, _hpat_pandas_dataframe_init_codegen -if config_pipeline_hpat_default is 0: +if not config_pipeline_hpat_default: @overload(pandas.DataFrame) def hpat_pandas_dataframe(data=None, index=None, columns=None, dtype=None, copy=False): """ From b9f35a94dc3ee2fca330d1413fefc5a44a17da2c Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Mon, 23 Dec 2019 09:33:56 +0300 Subject: [PATCH 11/19] unskip test --- sdc/tests/test_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdc/tests/test_dataframe.py b/sdc/tests/test_dataframe.py index 99a2e6807..fbe120b29 100644 --- a/sdc/tests/test_dataframe.py +++ b/sdc/tests/test_dataframe.py @@ -929,7 +929,7 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) - @skip_numba_jit + # @skip_numba_jit def test_count(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)}) From bd439e32575fb1a7f6e163305e47ef7f9d8be354 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Wed, 25 Dec 2019 14:16:38 +0300 Subject: [PATCH 12/19] correction allocation params --- .../hpat_pandas_dataframe_functions.py | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index 344e52d27..210d99d04 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -72,19 +72,31 @@ def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, col def sdc_pandas_dataframe_reduce_columns(df, func_name, params): all_params = ['df'] par1 = {'count': ['level']} + ser_par = [] + + print('PARAMS') + print(params) + + for key, value in params: + all_params.append('{}={}'.format(key, value)) if func_name in par1: for key, value in params: if key in par1[func_name]: - all_params.append('{}={}'.format(key, value)) + ser_par.append('{}={}'.format(key, value)) + sp = ser_par.copy() + par = '{}'.format(', '.join(sp)) else: - for key, value in params: - all_params.append('{}={}'.format(key, value)) - ap = all_params.copy() - par = '{}'.format(', '.join(ap[1:])) + ap = all_params.copy() + par = '{}'.format(', '.join(ap[1:])) df_func_name = f'_df_{func_name}_impl' + print('ALL PARAMS') + print(all_params) + print('PAR') + print(par) + func_text, global_vars = _dataframe_reduce_columns_codegen(func_name, all_params, par, df.columns) loc_vars = {} @@ -94,12 +106,6 @@ def sdc_pandas_dataframe_reduce_columns(df, func_name, params): return _reduce_impl -# param = 'level' -# if param in params -# par += 'level=' + param -# else -# par += 'level=' + 'None' - @overload_method(DataFrameType, 'count') def count_overload(df, axis=0, level=None, numeric_only=False): From 9e2f45dda8a81033c480683e531dde20f0bb81ae Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Wed, 25 Dec 2019 17:11:43 +0300 Subject: [PATCH 13/19] correction default parametrs --- sdc/datatypes/hpat_pandas_dataframe_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index 210d99d04..d6395226c 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -148,6 +148,6 @@ def count_overload(df, axis=0, level=None, numeric_only=False): if not (isinstance(numeric_only, types.Omitted) or numeric_only is False): ty_checker.raise_exc(numeric_only, 'unsupported', 'numeric_only') - params = [('axis', None), ('level', None), ('numeric_only', numeric_only)] + params = [('axis', 0), ('level', None), ('numeric_only', False)] return sdc_pandas_dataframe_reduce_columns(df, name, params) From 702b78af56565af8ced81cb971cde5b8538d746e Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 26 Dec 2019 08:53:40 +0300 Subject: [PATCH 14/19] unskiped test, added input parameters for series --- .../hpat_pandas_dataframe_functions.py | 33 ++++++------------- sdc/tests/test_dataframe.py | 2 -- 2 files changed, 10 insertions(+), 25 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index d6395226c..ff03dccb8 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -65,39 +65,25 @@ def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, col global_vars = {'pandas': pandas, 'np': numpy, 'init_series': sdc.hiframes.api.init_series, 'get_dataframe_data': sdc.hiframes.pd_dataframe_ext.get_dataframe_data} + print(func_text) return func_text, global_vars -def sdc_pandas_dataframe_reduce_columns(df, func_name, params): +def sdc_pandas_dataframe_reduce_columns(df, func_name, params, ser_params): all_params = ['df'] - par1 = {'count': ['level']} ser_par = [] - print('PARAMS') - print(params) - - for key, value in params: + for key, value in params.items(): all_params.append('{}={}'.format(key, value)) + for key, value in ser_params.items(): + ser_par.append('{}={}'.format(key, value)) - if func_name in par1: - for key, value in params: - if key in par1[func_name]: - ser_par.append('{}={}'.format(key, value)) - sp = ser_par.copy() - par = '{}'.format(', '.join(sp)) - else: - ap = all_params.copy() - par = '{}'.format(', '.join(ap[1:])) + s_par = '{}'.format(', '.join(ser_par[:])) df_func_name = f'_df_{func_name}_impl' - print('ALL PARAMS') - print(all_params) - print('PAR') - print(par) - - func_text, global_vars = _dataframe_reduce_columns_codegen(func_name, all_params, par, df.columns) + func_text, global_vars = _dataframe_reduce_columns_codegen(func_name, all_params, s_par, df.columns) loc_vars = {} print(global_vars, loc_vars) @@ -148,6 +134,7 @@ def count_overload(df, axis=0, level=None, numeric_only=False): if not (isinstance(numeric_only, types.Omitted) or numeric_only is False): ty_checker.raise_exc(numeric_only, 'unsupported', 'numeric_only') - params = [('axis', 0), ('level', None), ('numeric_only', False)] + params = {'axis': 0, 'level': None, 'numeric_only': False} + ser_par = {'level': 'level'} - return sdc_pandas_dataframe_reduce_columns(df, name, params) + return sdc_pandas_dataframe_reduce_columns(df, name, params, ser_par) diff --git a/sdc/tests/test_dataframe.py b/sdc/tests/test_dataframe.py index fbe120b29..da3906531 100644 --- a/sdc/tests/test_dataframe.py +++ b/sdc/tests/test_dataframe.py @@ -929,7 +929,6 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) - # @skip_numba_jit def test_count(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)}) @@ -939,7 +938,6 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) - @skip_numba_jit def test_count1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): From 2d05dfcdec696b0f44a5ae0aeb336ebd997eadd2 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 26 Dec 2019 09:38:23 +0300 Subject: [PATCH 15/19] delete print, skip with SDC_CONFIG_PIPELINE=1, not work with arguments --- sdc/datatypes/hpat_pandas_dataframe_functions.py | 4 ---- sdc/tests/test_dataframe.py | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index ff03dccb8..f3cf8113d 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -54,9 +54,7 @@ def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, col func_lines += [f' series_{c} = init_series(get_dataframe_data({func_params[0]}, {i}))', f' {result_c} = series_{c}.{func_name}({series_params})'] result_name_list.append(result_c) - print(result_name_list) all_results = ', '.join(result_name_list) - print(all_results) all_columns = ', '.join([f"'{c}'" for c in columns]) func_lines += [f' return pandas.Series([{all_results}], [{all_columns}])'] @@ -65,7 +63,6 @@ def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, col global_vars = {'pandas': pandas, 'np': numpy, 'init_series': sdc.hiframes.api.init_series, 'get_dataframe_data': sdc.hiframes.pd_dataframe_ext.get_dataframe_data} - print(func_text) return func_text, global_vars @@ -86,7 +83,6 @@ def sdc_pandas_dataframe_reduce_columns(df, func_name, params, ser_params): func_text, global_vars = _dataframe_reduce_columns_codegen(func_name, all_params, s_par, df.columns) loc_vars = {} - print(global_vars, loc_vars) exec(func_text, global_vars, loc_vars) _reduce_impl = loc_vars[df_func_name] diff --git a/sdc/tests/test_dataframe.py b/sdc/tests/test_dataframe.py index da3906531..dc7e39e0d 100644 --- a/sdc/tests/test_dataframe.py +++ b/sdc/tests/test_dataframe.py @@ -929,6 +929,7 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) + @skip_sdc_jit('SDC pipeline does not support arguments for Series.count()') def test_count(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)}) @@ -938,6 +939,7 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) + @skip_sdc_jit('SDC pipeline does not support arguments for Series.count()') def test_count1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): From 5b26671a892ed194ba701b7f908678c7512a6946 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 26 Dec 2019 17:11:55 +0300 Subject: [PATCH 16/19] comment string in __init__ --- sdc/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdc/__init__.py b/sdc/__init__.py index 5164e75f2..a60ede2db 100644 --- a/sdc/__init__.py +++ b/sdc/__init__.py @@ -60,7 +60,7 @@ TODO: Needs to detect 'import Pandas' and align initialization according to it """ - sdc.config.numba_compiler_define_nopython_pipeline_orig = numba.compiler.DefaultPassBuilder.define_nopython_pipeline + # sdc.config.numba_compiler_define_nopython_pipeline_orig = numba.compiler.DefaultPassBuilder.define_nopython_pipeline # numba.compiler.DefaultPassBuilder.define_nopython_pipeline = sdc.datatypes.hpat_pandas_dataframe_pass.sdc_nopython_pipeline_lite_register From e39f58a2a7fad5db878c852eb6a7b24c724fab0b Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Thu, 26 Dec 2019 17:32:27 +0300 Subject: [PATCH 17/19] commented function --- sdc/datatypes/hpat_pandas_dataframe_functions.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index f3cf8113d..f9f444fca 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -44,7 +44,16 @@ from sdc.datatypes.hpat_pandas_series_functions import TypeChecker - +''' +Example func_text for func_name='count' columns=('A', 'B'): + + def _df_count_impl(df, axis=0, level=None, numeric_only=False): + series_A = init_series(get_dataframe_data(df, 0)) + result_A = series_A.count(level=level) + series_B = init_series(get_dataframe_data(df, 1)) + result_B = series_B.count(level=level) + return pandas.Series([result_A, result_B], ['A', 'B']) +''' def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, columns): result_name_list = [] joined = ', '.join(func_params) From ee648308ad24571f20c3b64fad331616fe23f69c Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Fri, 27 Dec 2019 10:07:21 +0300 Subject: [PATCH 18/19] fixed style issues --- sdc/__init__.py | 6 ++++-- sdc/datatypes/hpat_pandas_dataframe_functions.py | 2 ++ sdc/hiframes/pd_dataframe_ext.py | 5 ++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/sdc/__init__.py b/sdc/__init__.py index a60ede2db..c7ac6dab5 100644 --- a/sdc/__init__.py +++ b/sdc/__init__.py @@ -60,8 +60,10 @@ TODO: Needs to detect 'import Pandas' and align initialization according to it """ - # sdc.config.numba_compiler_define_nopython_pipeline_orig = numba.compiler.DefaultPassBuilder.define_nopython_pipeline - # numba.compiler.DefaultPassBuilder.define_nopython_pipeline = sdc.datatypes.hpat_pandas_dataframe_pass.sdc_nopython_pipeline_lite_register + # sdc.config.numba_compiler_define_nopython_pipeline_orig = \ + # numba.compiler.DefaultPassBuilder.define_nopython_pipeline + # numba.compiler.DefaultPassBuilder.define_nopython_pipeline = \ + # sdc.datatypes.hpat_pandas_dataframe_pass.sdc_nopython_pipeline_lite_register def _init_extension(): diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index f9f444fca..41864f5e9 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -54,6 +54,8 @@ def _df_count_impl(df, axis=0, level=None, numeric_only=False): result_B = series_B.count(level=level) return pandas.Series([result_A, result_B], ['A', 'B']) ''' + + def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, columns): result_name_list = [] joined = ', '.join(func_params) diff --git a/sdc/hiframes/pd_dataframe_ext.py b/sdc/hiframes/pd_dataframe_ext.py index 08092bde2..fe46442da 100644 --- a/sdc/hiframes/pd_dataframe_ext.py +++ b/sdc/hiframes/pd_dataframe_ext.py @@ -44,6 +44,8 @@ from sdc.str_ext import string_type from sdc.str_arr_ext import string_array_type +from sdc.datatypes.hpat_pandas_dataframe_functions import * + class DataFrameType(types.Type): # TODO: IterableType over column names """Temporary type class for DataFrame objects. @@ -1628,6 +1630,3 @@ def _impl(df, path_or_buf=None, sep=',', na_rep='', float_format=None, date_format, doublequote, escapechar, decimal) return _impl - - -from sdc.datatypes.hpat_pandas_dataframe_functions import * From d4215cf511e918db148b20037453445d191ff510 Mon Sep 17 00:00:00 2001 From: mrubtsov Date: Fri, 27 Dec 2019 10:42:58 +0300 Subject: [PATCH 19/19] change --- .../hpat_pandas_dataframe_functions.py | 19 +++++++++---------- sdc/hiframes/pd_dataframe_ext.py | 6 ++++-- sdc/tests/test_dataframe.py | 2 -- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index 41864f5e9..2641df2c6 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -44,16 +44,15 @@ from sdc.datatypes.hpat_pandas_series_functions import TypeChecker -''' -Example func_text for func_name='count' columns=('A', 'B'): - - def _df_count_impl(df, axis=0, level=None, numeric_only=False): - series_A = init_series(get_dataframe_data(df, 0)) - result_A = series_A.count(level=level) - series_B = init_series(get_dataframe_data(df, 1)) - result_B = series_B.count(level=level) - return pandas.Series([result_A, result_B], ['A', 'B']) -''' + +# Example func_text for func_name='count' columns=('A', 'B'): +# +# def _df_count_impl(df, axis=0, level=None, numeric_only=False): +# series_A = init_series(get_dataframe_data(df, 0)) +# result_A = series_A.count(level=level) +# series_B = init_series(get_dataframe_data(df, 1)) +# result_B = series_B.count(level=level) +# return pandas.Series([result_A, result_B], ['A', 'B']) def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, columns): diff --git a/sdc/hiframes/pd_dataframe_ext.py b/sdc/hiframes/pd_dataframe_ext.py index fe46442da..3d44c7b18 100644 --- a/sdc/hiframes/pd_dataframe_ext.py +++ b/sdc/hiframes/pd_dataframe_ext.py @@ -44,8 +44,6 @@ from sdc.str_ext import string_type from sdc.str_arr_ext import string_array_type -from sdc.datatypes.hpat_pandas_dataframe_functions import * - class DataFrameType(types.Type): # TODO: IterableType over column names """Temporary type class for DataFrame objects. @@ -1630,3 +1628,7 @@ def _impl(df, path_or_buf=None, sep=',', na_rep='', float_format=None, date_format, doublequote, escapechar, decimal) return _impl + + +if not sdc.config.config_pipeline_hpat_default: + from sdc.datatypes.hpat_pandas_dataframe_functions import * diff --git a/sdc/tests/test_dataframe.py b/sdc/tests/test_dataframe.py index dc7e39e0d..da3906531 100644 --- a/sdc/tests/test_dataframe.py +++ b/sdc/tests/test_dataframe.py @@ -929,7 +929,6 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) - @skip_sdc_jit('SDC pipeline does not support arguments for Series.count()') def test_count(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)}) @@ -939,7 +938,6 @@ def test_impl(n): n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) - @skip_sdc_jit('SDC pipeline does not support arguments for Series.count()') def test_count1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n):