IntelPython · samaid · Dec 9, 2019 · Dec 9, 2019 · Dec 9, 2019
diff --git a/docs/source/buildscripts/__pycache__/__init__.cpython-37.pyc b/docs/source/buildscripts/__pycache__/__init__.cpython-37.pyc
diff --git a/docs/source/buildscripts/__pycache__/apiref_generator.cpython-37.pyc b/docs/source/buildscripts/__pycache__/apiref_generator.cpython-37.pyc
diff --git a/docs/source/buildscripts/__pycache__/sdc_doc_utils.cpython-37.pyc b/docs/source/buildscripts/__pycache__/sdc_doc_utils.cpython-37.pyc
diff --git a/docs/source/buildscripts/__pycache__/sdc_object_utils.cpython-37.pyc b/docs/source/buildscripts/__pycache__/sdc_object_utils.cpython-37.pyc
diff --git a/docs/source/buildscripts/apiref_generator.py b/docs/source/buildscripts/apiref_generator.py
@@ -28,7 +28,7 @@
 import pandas
 from sdc_object_utils import init_pandas_structure, init_sdc_structure, init_pandas_sdc_dict, get_sdc_object, get_obj
 from sdc_object_utils import get_class_methods, get_class_attributes, get_fully_qualified_name
-from sdc_doc_utils import is_sdc_user_guide_header, get_indent, reindent,get_short_description
+from sdc_doc_utils import is_sdc_user_guide_header, get_indent, reindent, get_short_description
 from sdc_doc_utils import split_in_sections, get_docstring, create_heading_str, cut_sdc_dev_guide
 import os
 
@@ -43,7 +43,7 @@ def reformat(text):
     :param text: Original text with warnings
     :return: Modified text that fixes warnings
     """
-    text = reformat_replace_star_list_with_dash_list(text) # Must be called before :func:`reformat_asterisks`
+    text = reformat_replace_star_list_with_dash_list(text)  # Must be called before :func:`reformat_asterisks`
     text = reformat_asterisks(text)  # Fix for * and ** symbols
     text = reformat_explicit_markup(text)  # Fix for explicit markup without a blank line
     text = reformat_bullet_list(text)  # Fix bullet list indentation issues
@@ -214,7 +214,7 @@ def reformat_asterisks(text):
                 if idx2 == -1:
                     # Only one single asterisk in the line - Reformat to `\*`
                     line = line.replace('*', '\\*')
-                    idx = len(line) # Parsed the line. Go to another line
+                    idx = len(line)  # Parsed the line. Go to another line
                 elif idx2 == idx1+1:
                     # First double asterisk met in the line
                     idx2 = line.find('**', idx1+2)
@@ -278,7 +278,6 @@ def _get_param_text(title, param):
         elif title == 'Raises':
             return ':raises:'
 
-
     # Internal function. Returns correct markup for Parameters section
     def _reformat_parameters(title, text):
         lines = text.split('\n')
@@ -608,15 +607,15 @@ def parse_templ_rst(fname_templ):
             doc.pop(0)  # Skipping ``.. sdc_toctree``
 
             # Parsing the list of APIs
-            while len(doc) >0 and doc[0].strip() != '':
+            while len(doc) > 0 and doc[0].strip() != '':
                 line = doc[0]
                 indent = get_indent(line)
                 line = line.strip()
                 full_name = current_module_name + '.' + line
                 obj = get_obj(full_name)
                 short_description = generate_simple_object_doc(obj, short_doc_flag=True).strip()
                 new_line = reindent(':ref:`', indent) + line + ' <' + full_name + '>`\n' + \
-                           reindent(short_description, indent+4) + '\n'
+                    reindent(short_description, indent+4) + '\n'
                 fout.write(new_line)
                 doc.pop(0)
 

diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py
@@ -40,16 +40,92 @@
 
 import sdc
 import sdc.datatypes.common_functions as common_functions
-from sdc.datatypes.common_functions import TypeChecker
 from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
 from sdc.hiframes.pd_series_ext import SeriesType
 from sdc.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars)
 from sdc.utils import to_array
 
+class TypeChecker:
+    """
+        Validate object type and raise TypingError if the type is invalid, e.g.:
+            Method nsmallest(). The object n
+             given: bool
+             expected: int
+        """
+    msg_template = '{} The object {}\n given: {}\n expected: {}'
+
+    def __init__(self, func_name):
+        """
+        Parameters
+        ----------
+        func_name: :obj:`str`
+            name of the function where types checking
+        """
+        self.func_name = func_name
+
+    def raise_exc(self, data, expected_types, name=''):
+        """
+        Raise exception with unified message
+        Parameters
+        ----------
+        data: :obj:`any`
+            real type of the data
+        expected_types: :obj:`str`
+            expected types inserting directly to the exception
+        name: :obj:`str`
+            name of the parameter
+        """
+        msg = self.msg_template.format(self.func_name, name, data, expected_types)
+        raise TypingError(msg)
+
+    def check(self, data, accepted_type, name=''):
+        """
+        Check data type belongs to specified type
+        Parameters
+        ----------
+        data: :obj:`any`
+            real type of the data
+        accepted_type: :obj:`type`
+            accepted type
+        name: :obj:`str`
+            name of the parameter
+        """
+        if not isinstance(data, accepted_type):
+            self.raise_exc(data, accepted_type.__name__, name=name)
+
 
 @overload(operator.getitem)
 def hpat_pandas_series_getitem(self, idx):
     """
+    Intel Scalable Dataframe Compiler User Guide
+    ********************************************
+    Pandas API: pandas.Series.get
+
+    Limitations
+    -----------
+    Supported ``key`` can be one of the following:
+        - Integer scalar, e.g. :obj:`series[0]`
+        - A slice, e.g. :obj:`series[2:5]`
+        - Another series
+
+    Examples
+    --------
+    .. literalinclude:: ../../../examples/series_getitem.py
+       :language: python
+       :lines: 27-
+       :caption: Getting Pandas Series elements
+       :name: ex_series_getitem
+
+    .. code-block:: console
+
+        > python ./series_getitem.py
+        55
+
+    .. todo:: Fix SDC behavior and add the expected output of the > python ./series_getitem.py to the docstring
+
+    Intel Scalable Dataframe Compiler Developer Guide
+    *************************************************
+
     Pandas Series operator :attr:`pandas.Series.get` implementation
     **Algorithm**: result = series[idx]
 
@@ -1131,7 +1207,7 @@ def hpat_pandas_series_corr(self, other, method='pearson', min_periods=None):
     if not isinstance(other.data.dtype, types.Number):
         ty_checker.raise_exc(other.data, 'number', 'other.data')
 
-    if not isinstance(min_periods, (int, types.Integer, types.Omitted, types.NoneType)) and min_periods is not None:
+    if not isinstance(min_periods, (types.Integer, types.Omitted, types.NoneType)):
         ty_checker.raise_exc(min_periods, 'int64', 'min_periods')
 
     def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None):
@@ -1153,20 +1229,7 @@ def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None
         if len(self_arr) < min_periods:
             return numpy.nan
 
-        new_self = pandas.Series(self_arr)
-        new_other = pandas.Series(other_arr)
-
-        n = new_self.count()
-        ma = new_self.sum()
-        mb = new_other.sum()
-        a = n * (self_arr * other_arr).sum() - ma * mb
-        b1 = n * (self_arr * self_arr).sum() - ma * ma
-        b2 = n * (other_arr * other_arr).sum() - mb * mb
-
-        if b1 == 0 or b2 == 0:
-            return numpy.nan
-
-        return a / numpy.sqrt(b1 * b2)
+        return numpy.corrcoef(self_arr, other_arr)[0, 1]
 
     return hpat_pandas_series_corr_impl
 
@@ -2100,77 +2163,6 @@ def hpat_pandas_series_quantile_impl(self, q=0.5, interpolation='linear'):
     return hpat_pandas_series_quantile_impl
 
 
-@overload_method(SeriesType, 'rename')
-def hpat_pandas_series_rename(self, index=None, copy=True, inplace=False, level=None):
-    """
-    Pandas Series method :meth:`pandas.Series.rename` implementation.
-    Alter Series index labels or name.
-    .. only:: developer
-       Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rename
-
-    Parameters
-    -----------
-    index : :obj:`scalar` or `hashable sequence` or `dict` or `function`
-               Dict-like or functions are transformations to apply to the index.
-               Scalar or hashable sequence-like will alter the Series.name attribute.
-               Only scalar value is supported.
-    copy : :obj:`bool`, default :obj:`True`
-               Whether to copy underlying data.
-    inplace : :obj:`bool`, default :obj:`False`
-               Whether to return a new Series. If True then value of copy is ignored.
-    level : :obj:`int` or `str`
-               In case of a MultiIndex, only rename labels in the specified level.
-               *Not supported*
-    Returns
-    -------
-    :obj:`pandas.Series`
-         returns :obj:`pandas.Series` with index labels or name altered.
-    """
-
-    ty_checker = TypeChecker('Method rename().')
-    ty_checker.check(self, SeriesType)
-
-    if not isinstance(index, (types.Omitted, types.UnicodeType,
-                              types.StringLiteral, str,
-                              types.Integer, types.Boolean,
-                              types.Hashable, types.Float,
-                              types.NPDatetime, types.NPTimedelta,
-                              types.Number)) and index is not None:
-        ty_checker.raise_exc(index, 'string', 'index')
-
-    if not isinstance(copy, (types.Omitted, types.Boolean, bool)):
-        ty_checker.raise_exc(copy, 'boolean', 'copy')
-
-    if not isinstance(inplace, (types.Omitted, types.Boolean, bool)):
-        ty_checker.raise_exc(inplace, 'boolean', 'inplace')
-
-    if not isinstance(level, (types.Omitted, types.UnicodeType,
-                              types.StringLiteral, types.Integer)) and level is not None:
-        ty_checker.raise_exc(level, 'Integer or srting', 'level')
-
-    def hpat_pandas_series_rename_idx_impl(self, index=None, copy=True, inplace=False, level=None):
-        if copy is True:
-            series_data = self._data.copy()
-            series_index = self._index.copy()
-        else:
-            series_data = self._data
-            series_index = self._index
-
-        return pandas.Series(data=series_data, index=series_index, name=index)
-
-    def hpat_pandas_series_rename_noidx_impl(self, index=None, copy=True, inplace=False, level=None):
-        if copy is True:
-            series_data = self._data.copy()
-        else:
-            series_data = self._data
-
-        return pandas.Series(data=series_data, index=self._index, name=index)
-
-    if isinstance(self.index, types.NoneType):
-        return hpat_pandas_series_rename_noidx_impl
-    return hpat_pandas_series_rename_idx_impl
-
-
 @overload_method(SeriesType, 'min')
 def hpat_pandas_series_min(self, axis=None, skipna=True, level=None, numeric_only=None):
     """
@@ -2933,12 +2925,11 @@ def hpat_pandas_series_nunique_str_impl(self, dropna=True):
             It is better to merge with Numeric branch
             """
 
-            data = self._data
-            if dropna:
-                nan_mask = self.isna()
-                data = self._data[~nan_mask._data]
-            unique_values = set(data)
-            return len(unique_values)
+            str_set = set(self._data)
+            if dropna == False:
+                return len(str_set) - 1
+            else:
+                return len(str_set)
 
         return hpat_pandas_series_nunique_str_impl
 
@@ -2992,8 +2983,7 @@ def hpat_pandas_series_count(self, level=None):
     if isinstance(self.data, StringArrayType):
         def hpat_pandas_series_count_str_impl(self, level=None):
 
-            nan_mask = self.isna()
-            return numpy.sum(nan_mask._data == 0)
+            return len(self._data)
 
         return hpat_pandas_series_count_str_impl
 
@@ -3143,10 +3133,10 @@ def hpat_pandas_series_argsort_idx_impl(self, axis=0, kind='quicksort', order=No
                 sort_nona = numpy.argsort(self._data[~na_data_arr])
             q = 0
             for id, i in enumerate(sort):
-                if id in set(sort[len(self._data) - na:]):
-                    q += 1
+                if id not in list(sort[len(self._data) - na:]):
+                    result[id] = sort_nona[id-q]
                 else:
-                    result[id] = sort_nona[id - q]
+                    q += 1
             for i in sort[len(self._data) - na:]:
                 result[i] = -1
 
@@ -3170,10 +3160,10 @@ def hpat_pandas_series_argsort_noidx_impl(self, axis=0, kind='quicksort', order=
             sort_nona = numpy.argsort(self._data[~na_data_arr])
         q = 0
         for id, i in enumerate(sort):
-            if id in set(sort[len(self._data) - na:]):
-                q += 1
-            else:
+            if id not in list(sort[len(self._data) - na:]):
                 result[id] = sort_nona[id - q]
+            else:
+                q += 1
         for i in sort[len(self._data) - na:]:
             result[i] = -1
 
@@ -3580,15 +3570,7 @@ def hpat_pandas_series_cov_impl(self, other, min_periods=None):
         if len(self_arr) < min_periods:
             return numpy.nan
 
-        new_self = pandas.Series(self_arr)
-
-        ma = new_self.mean()
-        mb = other.mean()
-
-        if numpy.isinf(mb):
-            return numpy.nan
-
-        return ((self_arr - ma) * (other_arr - mb)).sum() / (new_self.count() - 1.0)
+        return numpy.cov(self_arr, other_arr)[0, 1]
 
     return hpat_pandas_series_cov_impl