Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
11 changes: 5 additions & 6 deletions docs/source/buildscripts/apiref_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import pandas
from sdc_object_utils import init_pandas_structure, init_sdc_structure, init_pandas_sdc_dict, get_sdc_object, get_obj
from sdc_object_utils import get_class_methods, get_class_attributes, get_fully_qualified_name
from sdc_doc_utils import is_sdc_user_guide_header, get_indent, reindent,get_short_description
from sdc_doc_utils import is_sdc_user_guide_header, get_indent, reindent, get_short_description
from sdc_doc_utils import split_in_sections, get_docstring, create_heading_str, cut_sdc_dev_guide
import os

Expand All @@ -43,7 +43,7 @@ def reformat(text):
:param text: Original text with warnings
:return: Modified text that fixes warnings
"""
text = reformat_replace_star_list_with_dash_list(text) # Must be called before :func:`reformat_asterisks`
text = reformat_replace_star_list_with_dash_list(text) # Must be called before :func:`reformat_asterisks`
text = reformat_asterisks(text) # Fix for * and ** symbols
text = reformat_explicit_markup(text) # Fix for explicit markup without a blank line
text = reformat_bullet_list(text) # Fix bullet list indentation issues
Expand Down Expand Up @@ -214,7 +214,7 @@ def reformat_asterisks(text):
if idx2 == -1:
# Only one single asterisk in the line - Reformat to `\*`
line = line.replace('*', '\\*')
idx = len(line) # Parsed the line. Go to another line
idx = len(line) # Parsed the line. Go to another line
elif idx2 == idx1+1:
# First double asterisk met in the line
idx2 = line.find('**', idx1+2)
Expand Down Expand Up @@ -278,7 +278,6 @@ def _get_param_text(title, param):
elif title == 'Raises':
return ':raises:'


# Internal function. Returns correct markup for Parameters section
def _reformat_parameters(title, text):
lines = text.split('\n')
Expand Down Expand Up @@ -608,15 +607,15 @@ def parse_templ_rst(fname_templ):
doc.pop(0) # Skipping ``.. sdc_toctree``

# Parsing the list of APIs
while len(doc) >0 and doc[0].strip() != '':
while len(doc) > 0 and doc[0].strip() != '':
line = doc[0]
indent = get_indent(line)
line = line.strip()
full_name = current_module_name + '.' + line
obj = get_obj(full_name)
short_description = generate_simple_object_doc(obj, short_doc_flag=True).strip()
new_line = reindent(':ref:`', indent) + line + ' <' + full_name + '>`\n' + \
reindent(short_description, indent+4) + '\n'
reindent(short_description, indent+4) + '\n'
fout.write(new_line)
doc.pop(0)

Expand Down
202 changes: 92 additions & 110 deletions sdc/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,92 @@

import sdc
import sdc.datatypes.common_functions as common_functions
from sdc.datatypes.common_functions import TypeChecker
from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
from sdc.hiframes.pd_series_ext import SeriesType
from sdc.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars)
from sdc.utils import to_array

class TypeChecker:
"""
Validate object type and raise TypingError if the type is invalid, e.g.:
Method nsmallest(). The object n
given: bool
expected: int
"""
msg_template = '{} The object {}\n given: {}\n expected: {}'

def __init__(self, func_name):
"""
Parameters
----------
func_name: :obj:`str`
name of the function where types checking
"""
self.func_name = func_name

def raise_exc(self, data, expected_types, name=''):
"""
Raise exception with unified message
Parameters
----------
data: :obj:`any`
real type of the data
expected_types: :obj:`str`
expected types inserting directly to the exception
name: :obj:`str`
name of the parameter
"""
msg = self.msg_template.format(self.func_name, name, data, expected_types)
raise TypingError(msg)

def check(self, data, accepted_type, name=''):
"""
Check data type belongs to specified type
Parameters
----------
data: :obj:`any`
real type of the data
accepted_type: :obj:`type`
accepted type
name: :obj:`str`
name of the parameter
"""
if not isinstance(data, accepted_type):
self.raise_exc(data, accepted_type.__name__, name=name)


@overload(operator.getitem)
def hpat_pandas_series_getitem(self, idx):
"""
Intel Scalable Dataframe Compiler User Guide
********************************************
Pandas API: pandas.Series.get

Limitations
-----------
Supported ``key`` can be one of the following:
- Integer scalar, e.g. :obj:`series[0]`
- A slice, e.g. :obj:`series[2:5]`
- Another series

Examples
--------
.. literalinclude:: ../../../examples/series_getitem.py
:language: python
:lines: 27-
:caption: Getting Pandas Series elements
:name: ex_series_getitem

.. code-block:: console

> python ./series_getitem.py
55

.. todo:: Fix SDC behavior and add the expected output of the > python ./series_getitem.py to the docstring

Intel Scalable Dataframe Compiler Developer Guide
*************************************************

Pandas Series operator :attr:`pandas.Series.get` implementation
**Algorithm**: result = series[idx]

Expand Down Expand Up @@ -1131,7 +1207,7 @@ def hpat_pandas_series_corr(self, other, method='pearson', min_periods=None):
if not isinstance(other.data.dtype, types.Number):
ty_checker.raise_exc(other.data, 'number', 'other.data')

if not isinstance(min_periods, (int, types.Integer, types.Omitted, types.NoneType)) and min_periods is not None:
if not isinstance(min_periods, (types.Integer, types.Omitted, types.NoneType)):
ty_checker.raise_exc(min_periods, 'int64', 'min_periods')

def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None):
Expand All @@ -1153,20 +1229,7 @@ def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None
if len(self_arr) < min_periods:
return numpy.nan

new_self = pandas.Series(self_arr)
new_other = pandas.Series(other_arr)

n = new_self.count()
ma = new_self.sum()
mb = new_other.sum()
a = n * (self_arr * other_arr).sum() - ma * mb
b1 = n * (self_arr * self_arr).sum() - ma * ma
b2 = n * (other_arr * other_arr).sum() - mb * mb

if b1 == 0 or b2 == 0:
return numpy.nan

return a / numpy.sqrt(b1 * b2)
return numpy.corrcoef(self_arr, other_arr)[0, 1]

return hpat_pandas_series_corr_impl

Expand Down Expand Up @@ -2100,77 +2163,6 @@ def hpat_pandas_series_quantile_impl(self, q=0.5, interpolation='linear'):
return hpat_pandas_series_quantile_impl


@overload_method(SeriesType, 'rename')
def hpat_pandas_series_rename(self, index=None, copy=True, inplace=False, level=None):
"""
Pandas Series method :meth:`pandas.Series.rename` implementation.
Alter Series index labels or name.
.. only:: developer
Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rename

Parameters
-----------
index : :obj:`scalar` or `hashable sequence` or `dict` or `function`
Dict-like or functions are transformations to apply to the index.
Scalar or hashable sequence-like will alter the Series.name attribute.
Only scalar value is supported.
copy : :obj:`bool`, default :obj:`True`
Whether to copy underlying data.
inplace : :obj:`bool`, default :obj:`False`
Whether to return a new Series. If True then value of copy is ignored.
level : :obj:`int` or `str`
In case of a MultiIndex, only rename labels in the specified level.
*Not supported*
Returns
-------
:obj:`pandas.Series`
returns :obj:`pandas.Series` with index labels or name altered.
"""

ty_checker = TypeChecker('Method rename().')
ty_checker.check(self, SeriesType)

if not isinstance(index, (types.Omitted, types.UnicodeType,
types.StringLiteral, str,
types.Integer, types.Boolean,
types.Hashable, types.Float,
types.NPDatetime, types.NPTimedelta,
types.Number)) and index is not None:
ty_checker.raise_exc(index, 'string', 'index')

if not isinstance(copy, (types.Omitted, types.Boolean, bool)):
ty_checker.raise_exc(copy, 'boolean', 'copy')

if not isinstance(inplace, (types.Omitted, types.Boolean, bool)):
ty_checker.raise_exc(inplace, 'boolean', 'inplace')

if not isinstance(level, (types.Omitted, types.UnicodeType,
types.StringLiteral, types.Integer)) and level is not None:
ty_checker.raise_exc(level, 'Integer or srting', 'level')

def hpat_pandas_series_rename_idx_impl(self, index=None, copy=True, inplace=False, level=None):
if copy is True:
series_data = self._data.copy()
series_index = self._index.copy()
else:
series_data = self._data
series_index = self._index

return pandas.Series(data=series_data, index=series_index, name=index)

def hpat_pandas_series_rename_noidx_impl(self, index=None, copy=True, inplace=False, level=None):
if copy is True:
series_data = self._data.copy()
else:
series_data = self._data

return pandas.Series(data=series_data, index=self._index, name=index)

if isinstance(self.index, types.NoneType):
return hpat_pandas_series_rename_noidx_impl
return hpat_pandas_series_rename_idx_impl


@overload_method(SeriesType, 'min')
def hpat_pandas_series_min(self, axis=None, skipna=True, level=None, numeric_only=None):
"""
Expand Down Expand Up @@ -2933,12 +2925,11 @@ def hpat_pandas_series_nunique_str_impl(self, dropna=True):
It is better to merge with Numeric branch
"""

data = self._data
if dropna:
nan_mask = self.isna()
data = self._data[~nan_mask._data]
unique_values = set(data)
return len(unique_values)
str_set = set(self._data)
if dropna == False:
return len(str_set) - 1
else:
return len(str_set)

return hpat_pandas_series_nunique_str_impl

Expand Down Expand Up @@ -2992,8 +2983,7 @@ def hpat_pandas_series_count(self, level=None):
if isinstance(self.data, StringArrayType):
def hpat_pandas_series_count_str_impl(self, level=None):

nan_mask = self.isna()
return numpy.sum(nan_mask._data == 0)
return len(self._data)

return hpat_pandas_series_count_str_impl

Expand Down Expand Up @@ -3143,10 +3133,10 @@ def hpat_pandas_series_argsort_idx_impl(self, axis=0, kind='quicksort', order=No
sort_nona = numpy.argsort(self._data[~na_data_arr])
q = 0
for id, i in enumerate(sort):
if id in set(sort[len(self._data) - na:]):
q += 1
if id not in list(sort[len(self._data) - na:]):
result[id] = sort_nona[id-q]
else:
result[id] = sort_nona[id - q]
q += 1
for i in sort[len(self._data) - na:]:
result[i] = -1

Expand All @@ -3170,10 +3160,10 @@ def hpat_pandas_series_argsort_noidx_impl(self, axis=0, kind='quicksort', order=
sort_nona = numpy.argsort(self._data[~na_data_arr])
q = 0
for id, i in enumerate(sort):
if id in set(sort[len(self._data) - na:]):
q += 1
else:
if id not in list(sort[len(self._data) - na:]):
result[id] = sort_nona[id - q]
else:
q += 1
for i in sort[len(self._data) - na:]:
result[i] = -1

Expand Down Expand Up @@ -3580,15 +3570,7 @@ def hpat_pandas_series_cov_impl(self, other, min_periods=None):
if len(self_arr) < min_periods:
return numpy.nan

new_self = pandas.Series(self_arr)

ma = new_self.mean()
mb = other.mean()

if numpy.isinf(mb):
return numpy.nan

return ((self_arr - ma) * (other_arr - mb)).sum() / (new_self.count() - 1.0)
return numpy.cov(self_arr, other_arr)[0, 1]

return hpat_pandas_series_cov_impl

Expand Down