Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
6669842
add reduce
Rubtsowa Dec 5, 2019
911de85
add reduce
Rubtsowa Dec 5, 2019
573860a
change
Rubtsowa Dec 6, 2019
6fe14d5
change hpat->sdc, change input parameters
Rubtsowa Dec 9, 2019
5befff5
Merge branch 'master' into df_reduce
Rubtsowa Dec 13, 2019
028e048
change
Rubtsowa Dec 17, 2019
0120972
Merge branch 'df_reduce' of https://github.com/Rubtsowa/hpat into df_…
Rubtsowa Dec 17, 2019
0369775
add example for check reduce
Rubtsowa Dec 18, 2019
e080db8
division into 2 functions
Rubtsowa Dec 19, 2019
e617ce4
add selection of parameters
Rubtsowa Dec 20, 2019
1df6dec
comment string in __init__
Rubtsowa Dec 20, 2019
864a26c
import ovetload for DF
Rubtsowa Dec 20, 2019
0351469
resolve conflict
Rubtsowa Dec 23, 2019
b9f35a9
unskip test
Rubtsowa Dec 23, 2019
6a944bf
Merge branch 'master' of https://github.com/IntelPython/hpat into df_…
Rubtsowa Dec 25, 2019
bd439e3
correction allocation params
Rubtsowa Dec 25, 2019
9e2f45d
correction default parametrs
Rubtsowa Dec 25, 2019
702b78a
unskiped test, added input parameters for series
Rubtsowa Dec 26, 2019
a13a104
Merge branch 'master' of https://github.com/IntelPython/hpat into df_…
Rubtsowa Dec 26, 2019
2d05dfc
delete print, skip with SDC_CONFIG_PIPELINE=1, not work with arguments
Rubtsowa Dec 26, 2019
ea0d9a0
Merge branch 'master' of https://github.com/IntelPython/hpat into df_…
Rubtsowa Dec 26, 2019
122f526
Merge branch 'master' of https://github.com/IntelPython/hpat into df_…
Rubtsowa Dec 26, 2019
5b26671
comment string in __init__
Rubtsowa Dec 26, 2019
e39f58a
commented function
Rubtsowa Dec 26, 2019
c2a108b
Merge branch 'master' of https://github.com/IntelPython/hpat into df_…
Rubtsowa Dec 26, 2019
367d882
Merge branch 'master' of https://github.com/IntelPython/hpat into df_…
Rubtsowa Dec 27, 2019
ee64830
fixed style issues
Rubtsowa Dec 27, 2019
d4215cf
change
Rubtsowa Dec 27, 2019
6a0de00
Merge branch 'master' of https://github.com/IntelPython/hpat into df_…
Rubtsowa Dec 27, 2019
8ff2214
Merge branch 'master' of https://github.com/IntelPython/hpat into df_…
Rubtsowa Dec 27, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions sdc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,10 @@
"""

# sdc.config.numba_compiler_define_nopython_pipeline_orig = \
# numba.compiler.DefaultPassBuilder.define_nopython_pipeline
# numba.compiler.DefaultPassBuilder.define_nopython_pipeline
# numba.compiler.DefaultPassBuilder.define_nopython_pipeline = \
# sdc.datatypes.hpat_pandas_dataframe_pass.sdc_nopython_pipeline_lite_register
# sdc.datatypes.hpat_pandas_dataframe_pass.sdc_nopython_pipeline_lite_register

Comment on lines +64 to +67
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Revert this changes.


def _init_extension():
'''Register Pandas classes and functions with Numba.
Expand Down
107 changes: 79 additions & 28 deletions sdc/datatypes/hpat_pandas_dataframe_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,65 +31,116 @@

import operator
import pandas
import copy
import numpy

import sdc

from numba import types
from numba.extending import (overload, overload_method, overload_attribute)
from sdc.hiframes.pd_dataframe_ext import DataFrameType
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move imports from sdc after imports from numba.

from numba.errors import TypingError
import sdc.datatypes.hpat_pandas_dataframe_types

from sdc.datatypes.hpat_pandas_series_functions import TypeChecker


# Example func_text for func_name='count' columns=('A', 'B'):
#
# def _df_count_impl(df, axis=0, level=None, numeric_only=False):
# series_A = init_series(get_dataframe_data(df, 0))
# result_A = series_A.count(level=level)
# series_B = init_series(get_dataframe_data(df, 1))
# result_B = series_B.count(level=level)
# return pandas.Series([result_A, result_B], ['A', 'B'])


def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, columns):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add a multi-line comment at the top of this function (not a docstring) with a short example of how func_text will look like (for example for mean and a DF with 1 column)?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still seeing here docstring instead of comment

result_name_list = []
joined = ', '.join(func_params)
func_lines = [f'def _df_{func_name}_impl({joined}):']
for i, c in enumerate(columns):
result_c = f'result_{c}'
func_lines += [f' series_{c} = init_series(get_dataframe_data({func_params[0]}, {i}))',
f' {result_c} = series_{c}.{func_name}({series_params})']
result_name_list.append(result_c)
all_results = ', '.join(result_name_list)
all_columns = ', '.join([f"'{c}'" for c in columns])

func_lines += [f' return pandas.Series([{all_results}], [{all_columns}])']
func_text = '\n'.join(func_lines)

global_vars = {'pandas': pandas, 'np': numpy,
'init_series': sdc.hiframes.api.init_series,
'get_dataframe_data': sdc.hiframes.pd_dataframe_ext.get_dataframe_data}

return func_text, global_vars

from sdc.datatypes.hpat_pandas_dataframe_types import DataFrameType
from sdc.utils import sdc_overload_method

def sdc_pandas_dataframe_reduce_columns(df, func_name, params, ser_params):
all_params = ['df']
ser_par = []

@sdc_overload_method(DataFrameType, 'count')
def sdc_pandas_dataframe_count(self, axis=0, level=None, numeric_only=False):
for key, value in params.items():
all_params.append('{}={}'.format(key, value))
for key, value in ser_params.items():
ser_par.append('{}={}'.format(key, value))

s_par = '{}'.format(', '.join(ser_par[:]))

df_func_name = f'_df_{func_name}_impl'

func_text, global_vars = _dataframe_reduce_columns_codegen(func_name, all_params, s_par, df.columns)

loc_vars = {}
exec(func_text, global_vars, loc_vars)
_reduce_impl = loc_vars[df_func_name]

return _reduce_impl


@overload_method(DataFrameType, 'count')
def count_overload(df, axis=0, level=None, numeric_only=False):
"""
Pandas DataFrame method :meth:`pandas.DataFrame.count` implementation.
.. only:: developer
Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count
Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count
Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count1
Parameters
-----------
self: :class:`pandas.DataFrame`
input arg
input arg
axis:
*unsupported*
*unsupported*
level:
*unsupported*
*unsupported*
numeric_only:
*unsupported*
*unsupported*
Returns
-------
:obj:`pandas.Series` or `pandas.DataFrame`
returns: For each column/row the number of non-NA/null entries. If level is specified returns a DataFrame.
for each column/row the number of non-NA/null entries. If level is specified returns a DataFrame.
"""
Comment on lines 104 to 127
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Documentation is not like in other function. See #455 and docs from other functions.


_func_name = 'Method pandas.dataframe.count().'
name = 'count'

if not isinstance(self, DataFrameType):
raise TypingError('{} The object must be a pandas.dataframe. Given: {}'.format(_func_name, self))
ty_checker = TypeChecker('Method {}().'.format(name))
ty_checker.check(df, DataFrameType)

if not (isinstance(axis, types.Omitted) or axis == 0):
raise TypingError("{} 'axis' unsupported. Given: {}".format(_func_name, axis))
ty_checker.raise_exc(axis, 'unsupported', 'axis')

if not (isinstance(level, types.Omitted) or level is None):
raise TypingError("{} 'level' unsupported. Given: {}".format(_func_name, axis))
ty_checker.raise_exc(level, 'unsupported', 'level')

if not (isinstance(numeric_only, types.Omitted) or numeric_only is False):
raise TypingError("{} 'numeric_only' unsupported. Given: {}".format(_func_name, axis))

def sdc_pandas_dataframe_count_impl(self, axis=0, level=None, numeric_only=False):
result_data = []
result_index = []

for dataframe_item in self._data:
item_count = dataframe_item.count()
item_name = dataframe_item._name
result_data.append(item_count)
result_index.append(item_name)
ty_checker.raise_exc(numeric_only, 'unsupported', 'numeric_only')

return pandas.Series(data=result_data, index=result_index)
params = {'axis': 0, 'level': None, 'numeric_only': False}
ser_par = {'level': 'level'}

return sdc_pandas_dataframe_count_impl
return sdc_pandas_dataframe_reduce_columns(df, name, params, ser_par)
4 changes: 2 additions & 2 deletions sdc/datatypes/hpat_pandas_dataframe_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def iterator_type(self):
return DataFrameTypeIterator(self)


if config_pipeline_hpat_default is 0:
if not config_pipeline_hpat_default:
@register_model(DataFrameType)
class DataFrameTypeModel(StructModel):
"""
Expand Down Expand Up @@ -163,7 +163,7 @@ def _hpat_pandas_dataframe_init_codegen(context, builder, signature, args):
return sig, _hpat_pandas_dataframe_init_codegen


if config_pipeline_hpat_default is 0:
if not config_pipeline_hpat_default:
@overload(pandas.DataFrame)
def hpat_pandas_dataframe(data=None, index=None, columns=None, dtype=None, copy=False):
"""
Expand Down
4 changes: 4 additions & 0 deletions sdc/hiframes/pd_dataframe_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -1628,3 +1628,7 @@ def _impl(df, path_or_buf=None, sep=',', na_rep='', float_format=None,
date_format, doublequote, escapechar, decimal)

return _impl


if not sdc.config.config_pipeline_hpat_default:
from sdc.datatypes.hpat_pandas_dataframe_functions import *
Comment on lines +1633 to +1634
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, consider another @overload_method(DataFrameType, 'count') in pd_dataframe_ext.py on line ~1564. You should switch from old to new overload not only switch on new one.
@densmirn have we something like list _non_hpat_pipeline_attrs in pd_series_ext.py but for DataFrame?

2 changes: 0 additions & 2 deletions sdc/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,6 @@ def test_impl(n):
n = 11
pd.testing.assert_series_equal(hpat_func(n), test_impl(n))

@skip_numba_jit
def test_count(self):
def test_impl(n):
df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)})
Expand All @@ -939,7 +938,6 @@ def test_impl(n):
n = 11
pd.testing.assert_series_equal(hpat_func(n), test_impl(n))

@skip_numba_jit
def test_count1(self):
# TODO: non-numeric columns should be ignored automatically
def test_impl(n):
Expand Down