diff --git a/docs/source/buildscripts/__pycache__/__init__.cpython-37.pyc b/docs/source/buildscripts/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index a2db886b3..000000000 Binary files a/docs/source/buildscripts/__pycache__/__init__.cpython-37.pyc and /dev/null differ diff --git a/docs/source/buildscripts/__pycache__/apiref_generator.cpython-37.pyc b/docs/source/buildscripts/__pycache__/apiref_generator.cpython-37.pyc deleted file mode 100644 index ba7861f33..000000000 Binary files a/docs/source/buildscripts/__pycache__/apiref_generator.cpython-37.pyc and /dev/null differ diff --git a/docs/source/buildscripts/__pycache__/sdc_doc_utils.cpython-37.pyc b/docs/source/buildscripts/__pycache__/sdc_doc_utils.cpython-37.pyc deleted file mode 100644 index bd04a7c3d..000000000 Binary files a/docs/source/buildscripts/__pycache__/sdc_doc_utils.cpython-37.pyc and /dev/null differ diff --git a/docs/source/buildscripts/__pycache__/sdc_object_utils.cpython-37.pyc b/docs/source/buildscripts/__pycache__/sdc_object_utils.cpython-37.pyc deleted file mode 100644 index 3e3909df5..000000000 Binary files a/docs/source/buildscripts/__pycache__/sdc_object_utils.cpython-37.pyc and /dev/null differ diff --git a/docs/source/buildscripts/apiref_generator.py b/docs/source/buildscripts/apiref_generator.py index 892311485..674d7f080 100644 --- a/docs/source/buildscripts/apiref_generator.py +++ b/docs/source/buildscripts/apiref_generator.py @@ -28,7 +28,7 @@ import pandas from sdc_object_utils import init_pandas_structure, init_sdc_structure, init_pandas_sdc_dict, get_sdc_object, get_obj from sdc_object_utils import get_class_methods, get_class_attributes, get_fully_qualified_name -from sdc_doc_utils import is_sdc_user_guide_header, get_indent, reindent,get_short_description +from sdc_doc_utils import is_sdc_user_guide_header, get_indent, reindent, get_short_description from sdc_doc_utils import split_in_sections, get_docstring, create_heading_str, cut_sdc_dev_guide import os @@ -43,7 +43,7 @@ def reformat(text): :param text: Original text with warnings :return: Modified text that fixes warnings """ - text = reformat_replace_star_list_with_dash_list(text) # Must be called before :func:`reformat_asterisks` + text = reformat_replace_star_list_with_dash_list(text) # Must be called before :func:`reformat_asterisks` text = reformat_asterisks(text) # Fix for * and ** symbols text = reformat_explicit_markup(text) # Fix for explicit markup without a blank line text = reformat_bullet_list(text) # Fix bullet list indentation issues @@ -214,7 +214,7 @@ def reformat_asterisks(text): if idx2 == -1: # Only one single asterisk in the line - Reformat to `\*` line = line.replace('*', '\\*') - idx = len(line) # Parsed the line. Go to another line + idx = len(line) # Parsed the line. Go to another line elif idx2 == idx1+1: # First double asterisk met in the line idx2 = line.find('**', idx1+2) @@ -278,7 +278,6 @@ def _get_param_text(title, param): elif title == 'Raises': return ':raises:' - # Internal function. Returns correct markup for Parameters section def _reformat_parameters(title, text): lines = text.split('\n') @@ -608,7 +607,7 @@ def parse_templ_rst(fname_templ): doc.pop(0) # Skipping ``.. sdc_toctree`` # Parsing the list of APIs - while len(doc) >0 and doc[0].strip() != '': + while len(doc) > 0 and doc[0].strip() != '': line = doc[0] indent = get_indent(line) line = line.strip() @@ -616,7 +615,7 @@ def parse_templ_rst(fname_templ): obj = get_obj(full_name) short_description = generate_simple_object_doc(obj, short_doc_flag=True).strip() new_line = reindent(':ref:`', indent) + line + ' <' + full_name + '>`\n' + \ - reindent(short_description, indent+4) + '\n' + reindent(short_description, indent+4) + '\n' fout.write(new_line) doc.pop(0) diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py index 43fc93d9a..d18602bad 100644 --- a/sdc/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -40,16 +40,92 @@ import sdc import sdc.datatypes.common_functions as common_functions -from sdc.datatypes.common_functions import TypeChecker from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType from sdc.hiframes.pd_series_ext import SeriesType from sdc.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars) from sdc.utils import to_array +class TypeChecker: + """ + Validate object type and raise TypingError if the type is invalid, e.g.: + Method nsmallest(). The object n + given: bool + expected: int + """ + msg_template = '{} The object {}\n given: {}\n expected: {}' + + def __init__(self, func_name): + """ + Parameters + ---------- + func_name: :obj:`str` + name of the function where types checking + """ + self.func_name = func_name + + def raise_exc(self, data, expected_types, name=''): + """ + Raise exception with unified message + Parameters + ---------- + data: :obj:`any` + real type of the data + expected_types: :obj:`str` + expected types inserting directly to the exception + name: :obj:`str` + name of the parameter + """ + msg = self.msg_template.format(self.func_name, name, data, expected_types) + raise TypingError(msg) + + def check(self, data, accepted_type, name=''): + """ + Check data type belongs to specified type + Parameters + ---------- + data: :obj:`any` + real type of the data + accepted_type: :obj:`type` + accepted type + name: :obj:`str` + name of the parameter + """ + if not isinstance(data, accepted_type): + self.raise_exc(data, accepted_type.__name__, name=name) + @overload(operator.getitem) def hpat_pandas_series_getitem(self, idx): """ + Intel Scalable Dataframe Compiler User Guide + ******************************************** + Pandas API: pandas.Series.get + + Limitations + ----------- + Supported ``key`` can be one of the following: + - Integer scalar, e.g. :obj:`series[0]` + - A slice, e.g. :obj:`series[2:5]` + - Another series + + Examples + -------- + .. literalinclude:: ../../../examples/series_getitem.py + :language: python + :lines: 27- + :caption: Getting Pandas Series elements + :name: ex_series_getitem + + .. code-block:: console + + > python ./series_getitem.py + 55 + + .. todo:: Fix SDC behavior and add the expected output of the > python ./series_getitem.py to the docstring + + Intel Scalable Dataframe Compiler Developer Guide + ************************************************* + Pandas Series operator :attr:`pandas.Series.get` implementation **Algorithm**: result = series[idx] @@ -1131,7 +1207,7 @@ def hpat_pandas_series_corr(self, other, method='pearson', min_periods=None): if not isinstance(other.data.dtype, types.Number): ty_checker.raise_exc(other.data, 'number', 'other.data') - if not isinstance(min_periods, (int, types.Integer, types.Omitted, types.NoneType)) and min_periods is not None: + if not isinstance(min_periods, (types.Integer, types.Omitted, types.NoneType)): ty_checker.raise_exc(min_periods, 'int64', 'min_periods') def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None): @@ -1153,20 +1229,7 @@ def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None if len(self_arr) < min_periods: return numpy.nan - new_self = pandas.Series(self_arr) - new_other = pandas.Series(other_arr) - - n = new_self.count() - ma = new_self.sum() - mb = new_other.sum() - a = n * (self_arr * other_arr).sum() - ma * mb - b1 = n * (self_arr * self_arr).sum() - ma * ma - b2 = n * (other_arr * other_arr).sum() - mb * mb - - if b1 == 0 or b2 == 0: - return numpy.nan - - return a / numpy.sqrt(b1 * b2) + return numpy.corrcoef(self_arr, other_arr)[0, 1] return hpat_pandas_series_corr_impl @@ -2100,77 +2163,6 @@ def hpat_pandas_series_quantile_impl(self, q=0.5, interpolation='linear'): return hpat_pandas_series_quantile_impl -@overload_method(SeriesType, 'rename') -def hpat_pandas_series_rename(self, index=None, copy=True, inplace=False, level=None): - """ - Pandas Series method :meth:`pandas.Series.rename` implementation. - Alter Series index labels or name. - .. only:: developer - Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rename - - Parameters - ----------- - index : :obj:`scalar` or `hashable sequence` or `dict` or `function` - Dict-like or functions are transformations to apply to the index. - Scalar or hashable sequence-like will alter the Series.name attribute. - Only scalar value is supported. - copy : :obj:`bool`, default :obj:`True` - Whether to copy underlying data. - inplace : :obj:`bool`, default :obj:`False` - Whether to return a new Series. If True then value of copy is ignored. - level : :obj:`int` or `str` - In case of a MultiIndex, only rename labels in the specified level. - *Not supported* - Returns - ------- - :obj:`pandas.Series` - returns :obj:`pandas.Series` with index labels or name altered. - """ - - ty_checker = TypeChecker('Method rename().') - ty_checker.check(self, SeriesType) - - if not isinstance(index, (types.Omitted, types.UnicodeType, - types.StringLiteral, str, - types.Integer, types.Boolean, - types.Hashable, types.Float, - types.NPDatetime, types.NPTimedelta, - types.Number)) and index is not None: - ty_checker.raise_exc(index, 'string', 'index') - - if not isinstance(copy, (types.Omitted, types.Boolean, bool)): - ty_checker.raise_exc(copy, 'boolean', 'copy') - - if not isinstance(inplace, (types.Omitted, types.Boolean, bool)): - ty_checker.raise_exc(inplace, 'boolean', 'inplace') - - if not isinstance(level, (types.Omitted, types.UnicodeType, - types.StringLiteral, types.Integer)) and level is not None: - ty_checker.raise_exc(level, 'Integer or srting', 'level') - - def hpat_pandas_series_rename_idx_impl(self, index=None, copy=True, inplace=False, level=None): - if copy is True: - series_data = self._data.copy() - series_index = self._index.copy() - else: - series_data = self._data - series_index = self._index - - return pandas.Series(data=series_data, index=series_index, name=index) - - def hpat_pandas_series_rename_noidx_impl(self, index=None, copy=True, inplace=False, level=None): - if copy is True: - series_data = self._data.copy() - else: - series_data = self._data - - return pandas.Series(data=series_data, index=self._index, name=index) - - if isinstance(self.index, types.NoneType): - return hpat_pandas_series_rename_noidx_impl - return hpat_pandas_series_rename_idx_impl - - @overload_method(SeriesType, 'min') def hpat_pandas_series_min(self, axis=None, skipna=True, level=None, numeric_only=None): """ @@ -2933,12 +2925,11 @@ def hpat_pandas_series_nunique_str_impl(self, dropna=True): It is better to merge with Numeric branch """ - data = self._data - if dropna: - nan_mask = self.isna() - data = self._data[~nan_mask._data] - unique_values = set(data) - return len(unique_values) + str_set = set(self._data) + if dropna == False: + return len(str_set) - 1 + else: + return len(str_set) return hpat_pandas_series_nunique_str_impl @@ -2992,8 +2983,7 @@ def hpat_pandas_series_count(self, level=None): if isinstance(self.data, StringArrayType): def hpat_pandas_series_count_str_impl(self, level=None): - nan_mask = self.isna() - return numpy.sum(nan_mask._data == 0) + return len(self._data) return hpat_pandas_series_count_str_impl @@ -3143,10 +3133,10 @@ def hpat_pandas_series_argsort_idx_impl(self, axis=0, kind='quicksort', order=No sort_nona = numpy.argsort(self._data[~na_data_arr]) q = 0 for id, i in enumerate(sort): - if id in set(sort[len(self._data) - na:]): - q += 1 + if id not in list(sort[len(self._data) - na:]): + result[id] = sort_nona[id-q] else: - result[id] = sort_nona[id - q] + q += 1 for i in sort[len(self._data) - na:]: result[i] = -1 @@ -3170,10 +3160,10 @@ def hpat_pandas_series_argsort_noidx_impl(self, axis=0, kind='quicksort', order= sort_nona = numpy.argsort(self._data[~na_data_arr]) q = 0 for id, i in enumerate(sort): - if id in set(sort[len(self._data) - na:]): - q += 1 - else: + if id not in list(sort[len(self._data) - na:]): result[id] = sort_nona[id - q] + else: + q += 1 for i in sort[len(self._data) - na:]: result[i] = -1 @@ -3580,15 +3570,7 @@ def hpat_pandas_series_cov_impl(self, other, min_periods=None): if len(self_arr) < min_periods: return numpy.nan - new_self = pandas.Series(self_arr) - - ma = new_self.mean() - mb = other.mean() - - if numpy.isinf(mb): - return numpy.nan - - return ((self_arr - ma) * (other_arr - mb)).sum() / (new_self.count() - 1.0) + return numpy.cov(self_arr, other_arr)[0, 1] return hpat_pandas_series_cov_impl