- 
                Notifications
    You must be signed in to change notification settings 
- Fork 62
Refactor Series.median() in a new style via np.median #228
Changes from all commits
9eb2e92
              686cc9a
              f1a48b3
              2484e03
              a8e2c28
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|  | @@ -2082,3 +2082,67 @@ def hpat_pandas_series_nunique_impl(self, dropna=True): | |||||
| return len(data_set) + 1 | ||||||
|  | ||||||
| return hpat_pandas_series_nunique_impl | ||||||
|  | ||||||
|  | ||||||
| @overload_method(SeriesType, 'median') | ||||||
| def hpat_pandas_series_median(self, axis=None, skipna=True, level=None, numeric_only=None): | ||||||
| """ | ||||||
| Pandas Series method :meth:`pandas.Series.median` implementation. | ||||||
|  | ||||||
| .. only:: developer | ||||||
|  | ||||||
| Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_median1 | ||||||
| Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_median_skipna_default1 | ||||||
| Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_median_skipna_false1 | ||||||
|  | ||||||
| Parameters | ||||||
| ----------- | ||||||
| self: :obj:`pandas.Series` | ||||||
| input series | ||||||
| axis: :obj:`int` or :obj:`string` {0 or `index`, None}, default None | ||||||
| The axis for the function to be applied on. | ||||||
| *unsupported* | ||||||
| skipna: :obj:`bool`, default True | ||||||
| exclude NA/null values when computing the result | ||||||
| level: :obj:`int` or :obj:`string`, default None | ||||||
| *unsupported* | ||||||
| numeric_only: :obj:`bool` or None, default None | ||||||
| *unsupported* | ||||||
|  | ||||||
| Returns | ||||||
| ------- | ||||||
| :obj:`float` or :obj:`pandas.Series` (if level is specified) | ||||||
| median of values in the series | ||||||
|  | ||||||
| """ | ||||||
|  | ||||||
| _func_name = 'Method median().' | ||||||
|  | ||||||
| if not isinstance(self, SeriesType): | ||||||
| raise TypingError( | ||||||
| '{} The object must be a pandas.series. Given self: {}'.format(_func_name, self)) | ||||||
|  | ||||||
| if not isinstance(self.dtype, types.Number): | ||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 | ||||||
| raise TypingError( | ||||||
| '{} The function only applies to elements that are all numeric. Given data type: {}'.format(_func_name, self.dtype)) | ||||||
|  | ||||||
| if not (isinstance(axis, (types.Integer, types.UnicodeType, types.Omitted)) or axis is None): | ||||||
| raise TypingError('{} The axis must be an Integer or a String. Currently unsupported. Given: {}'.format(_func_name, axis)) | ||||||
|  | ||||||
| if not (isinstance(skipna, (types.Boolean, types.Omitted)) or skipna == True): | ||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you exactly need to check  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, without it the check will fail, because the check is passed several times during typing. So if skipna argument is omitted during one pass skipna will have types.Omitted and during the other pass it will have type(skipna)=bool, hence the second part of the check is needed to work properly and not raise exception. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe additional check on Python  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @densmirn I think this is a bug in Numba. Without second check it will not work. | ||||||
| raise TypingError('{} The is_copy must be a boolean. Given: {}'.format(_func_name, skipna)) | ||||||
|  | ||||||
| if not ((level is None or isinstance(level, types.Omitted)) | ||||||
| or (numeric_only is None or isinstance(numeric_only, types.Omitted)) | ||||||
| or (axis is None or isinstance(axis, types.Omitted)) | ||||||
| ): | ||||||
| raise TypingError('{} Unsupported parameters. Given level: {}, numeric_only: {}, axis: {}'.format(_func_name, level, numeric_only, axis)) | ||||||
|  | ||||||
|  | ||||||
| def hpat_pandas_series_median_impl(self, axis=None, skipna=True, level=None, numeric_only=None): | ||||||
| if skipna: | ||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see no checks for  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Corrected by adding checks. | ||||||
| return numpy.nanmedian(self._data) | ||||||
|  | ||||||
| return numpy.median(self._data) | ||||||
|  | ||||||
| return hpat_pandas_series_median_impl | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@shssf eventually what do you think about extracting
dtypefromself?self.dtypeorself.data.dtype. BTW for medtypemeans "data type",data.dtypemeans "data data dtype". I vote for shorter option.