Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Commit

Permalink
Merge c55ef91 into bcc377f
Browse files Browse the repository at this point in the history
  • Loading branch information
densmirn committed Oct 30, 2019
2 parents bcc377f + c55ef91 commit fde2f81
Show file tree
Hide file tree
Showing 4 changed files with 387 additions and 54 deletions.
163 changes: 160 additions & 3 deletions hpat/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@
import numpy
import operator
import pandas
from collections.abc import Iterable
from enum import Enum

from numba.errors import TypingError
from numba.extending import (types, overload, overload_method, overload_attribute)
from numba.extending import overload, overload_method, overload_attribute
from numba import types

import hpat
Expand All @@ -43,6 +45,76 @@
from hpat.utils import to_array


class AcceptedType(Enum):
series = (SeriesType,),
omitted = (types.Omitted,),
int_ = (int, types.Integer),
float_ = (float, types.Float),
number = (int, float, types.Number),
str_ = (str, types.UnicodeType, types.StringLiteral)


class TypingChecker:
"""
Validate object type and raise TypingError if the type is invalid, e.g.:
Method nsmallest(). The object n
given: bool
expected: int
"""
msg_template = '{} The object {}\n given: {}\n expected: {}'

def __init__(self, func_name):
"""
Parameters
----------
func_name: :obj:`str`
name of the function where types checking
"""
self.func_name = func_name

def msg(self, val, ty, name=''):
"""
Message of the exception in the special format
Parameters
----------
val: :obj:`any`
real type of the data
ty: :obj:`str`
expected type of the data
name: :obj:`str`
name of the parameter
Returns
-------
:class:`str`
message of the exception in the special format
"""
return self.msg_template.format(self.func_name, name, val, ty)

def check(self, data, accepted_types, name=''):
"""
Check data type belongs to specified type or list of types
Parameters
----------
data: :obj:`any`
real type of the data
accepted_types: :obj:`tuple` or :obj:`AcceptedType`
accepted types
name: :obj:`str`
name of the parameter
"""
if not isinstance(accepted_types, Iterable):
accepted_types = (accepted_types,)

for ty in accepted_types:
if isinstance(data, ty.value):
return True

raise TypingError(self.msg(data, ty.name, name=name))


@overload(operator.getitem)
def hpat_pandas_series_getitem(self, idx):
"""
Expand Down Expand Up @@ -138,6 +210,91 @@ def hpat_pandas_series_iloc_impl(self):
return hpat_pandas_series_iloc_impl


@overload_method(SeriesType, 'nsmallest')
def hpat_pandas_series_nsmallest(self, n=5, keep='first'):
"""
Pandas Series method :meth:`pandas.Series.nsmallest` implementation.
.. only:: developer
Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_nsmallest*
n: :obj:`int`, default 5
Return this many ascending sorted values.
keep: :obj:`str`, default 'first'
When there are duplicate values that cannot all fit in a Series of n elements:
first : return the first n occurrences in order of appearance.
last : return the last n occurrences in reverse order of appearance.
all : keep all occurrences. This can result in a Series of size larger than n.
*unsupported*
Returns
-------
:obj:`series`
returns :obj:`series`
"""

ty_checker = TypingChecker('Method nsmallest().')
ty_checker.check(self, AcceptedType.series)
ty_checker.check(n, (AcceptedType.omitted, AcceptedType.int_), name='n')
ty_checker.check(keep, (AcceptedType.omitted, AcceptedType.str_), name='keep')

def hpat_pandas_series_nsmallest_impl(self, n=5, keep='first'):
if keep != 'first':
raise ValueError("Method nsmallest(). Unsupported parameter. Given 'keep' != 'first'")

# mergesort is used for stable sorting of repeated values
indices = self._data.argsort(kind='mergesort')[:max(n, 0)]

return self.take(indices)

return hpat_pandas_series_nsmallest_impl


@overload_method(SeriesType, 'nlargest')
def hpat_pandas_series_nlargest(self, n=5, keep='first'):
"""
Pandas Series method :meth:`pandas.Series.nlargest` implementation.
.. only:: developer
Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_nlargest*
Parameters
----------
self: :obj:`pandas.Series`
input series
n: :obj:`int`, default 5
Return this many ascending sorted values.
keep: :obj:`str`, default 'first'
When there are duplicate values that cannot all fit in a Series of n elements:
first : return the first n occurrences in order of appearance.
last : return the last n occurrences in reverse order of appearance.
all : keep all occurrences. This can result in a Series of size larger than n.
*unsupported*
Returns
-------
:obj:`series`
returns :obj:`series`
"""

ty_checker = TypingChecker('Method nlargest().')
ty_checker.check(self, AcceptedType.series)
ty_checker.check(n, (AcceptedType.omitted, AcceptedType.int_), name='n')
ty_checker.check(keep, (AcceptedType.omitted, AcceptedType.str_), name='keep')

def hpat_pandas_series_nlargest_impl(self, n=5, keep='first'):
if keep != 'first':
raise ValueError("Method nlargest(). Unsupported parameter. Given 'keep' != 'first'")

# data: [0, 1, -1, 1, 0] -> [1, 1, 0, 0, -1]
# index: [0, 1, 2, 3, 4] -> [1, 3, 0, 4, 2] (not [3, 1, 4, 0, 2])
indices = (-self._data - 1).argsort(kind='mergesort')[:max(n, 0)]

return self.take(indices)

return hpat_pandas_series_nlargest_impl


@overload_attribute(SeriesType, 'shape')
def hpat_pandas_series_shape(self):
"""
Expand Down Expand Up @@ -1184,8 +1341,8 @@ def hpat_pandas_series_take(self, indices, axis=0, is_copy=False):
if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

if not isinstance(indices, types.List):
raise TypingError('{} The indices must be a List. Given: {}'.format(_func_name, indices))
if not isinstance(indices, (types.List, types.Array)):
raise TypingError('{} The indices must be an array-like. Given: {}'.format(_func_name, indices))

if not (isinstance(axis, (types.Integer, types.Omitted)) or axis == 0):
raise TypingError('{} The axis must be an Integer. Currently unsupported. Given: {}'.format(_func_name, axis))
Expand Down
4 changes: 2 additions & 2 deletions hpat/hiframes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,13 +585,13 @@ def select_k_nonan_overload(A, m, k):
dtype = A.dtype
if isinstance(dtype, types.Integer):
# ints don't have nans
return lambda A, m, k: (A[:k].copy(), k)
return lambda A, m, k: (A[:max(k, 0)].copy(), k)

assert isinstance(dtype, types.Float)

def select_k_nonan_float(A, m, k):
# select the first k elements but ignore NANs
min_heap_vals = np.empty(k, A.dtype)
min_heap_vals = np.empty(max(k, 0), A.dtype)
i = 0
ind = 0
while i < m and ind < k:
Expand Down
7 changes: 7 additions & 0 deletions hpat/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -999,13 +999,20 @@ def generic_expand_cumulative_series(self, args, kws):
if not hpat.config.config_pipeline_hpat_default:
_not_series_array_attrs.append('resolve_std')

_non_hpat_pipeline_attrs = ['resolve_nsmallest', 'resolve_nlargest']

# use ArrayAttribute for attributes not defined in SeriesAttribute
for attr, func in numba.typing.arraydecl.ArrayAttribute.__dict__.items():
if (attr.startswith('resolve_')
and attr not in SeriesAttribute.__dict__
and attr not in _not_series_array_attrs):
setattr(SeriesAttribute, attr, func)

# remove some attributes from SeriesAttribute for non-hpat pipeline
if not hpat.config.config_pipeline_hpat_default:
for attr in _non_hpat_pipeline_attrs:
if attr in SeriesAttribute.__dict__:
delattr(SeriesAttribute, attr)

# PR135. This needs to be commented out
@infer_global(operator.getitem)
Expand Down
Loading

0 comments on commit fde2f81

Please sign in to comment.