From 0db8ef8a29f4c9003f0681acb99eaf857e3206a7 Mon Sep 17 00:00:00 2001
From: "Kozlov, Alexey" <alexey.kozlov@intel.com>
Date: Mon, 18 Nov 2019 20:44:51 +0300
Subject: [PATCH 1/4] Add support for Series.operator.add in a new-style

---
 sdc/datatypes/common_functions.py             |  17 +
 sdc/datatypes/hpat_pandas_series_functions.py | 345 +++++++++++++++++-
 sdc/hiframes/pd_series_ext.py                 |   9 +-
 sdc/tests/test_series.py                      | 271 +++++++++++++-
 4 files changed, 636 insertions(+), 6 deletions(-)

diff --git a/sdc/datatypes/common_functions.py b/sdc/datatypes/common_functions.py
index d359d2c97..631aebccc 100644
--- a/sdc/datatypes/common_functions.py
+++ b/sdc/datatypes/common_functions.py
@@ -32,6 +32,7 @@
 
 import numpy
 
+import numba
 from numba import types
 from numba.errors import TypingError
 from numba.extending import overload
@@ -181,3 +182,19 @@ def _append_list_string_array_impl(A, B):
                 return new_data
 
             return _append_list_string_array_impl
+
+
+@numba.njit
+def _hpat_ensure_array_capacity(new_size, arr):
+    '''Function creating a copy of numpy array with a size more than specified'''
+    # TODO: replace this function with np.resize when supported by Numba
+    k = len(arr)
+    if k > new_size:
+        return arr
+
+    n = k
+    while n < new_size:
+        n = 2 * n
+    res = numpy.empty(n, arr.dtype)
+    res[:k] = arr[:k]
+    return res
diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py
index dc639d94d..30b703940 100644
--- a/sdc/datatypes/hpat_pandas_series_functions.py
+++ b/sdc/datatypes/hpat_pandas_series_functions.py
@@ -37,13 +37,16 @@
 from numba.errors import TypingError
 from numba.extending import overload, overload_method, overload_attribute
 from numba import types
+from numba import numpy_support
 
 import sdc
 import sdc.datatypes.common_functions as common_functions
 from sdc.datatypes.common_functions import TypeChecker
+from sdc.datatypes.common_functions import _hpat_ensure_array_capacity
 from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
 from sdc.hiframes.pd_series_ext import SeriesType
-from sdc.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars)
+from sdc.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars, string_array_type,
+                             str_arr_is_na, pre_alloc_string_array, str_arr_set_na)
 from sdc.utils import to_array
 
 
@@ -3722,3 +3725,343 @@ def hpat_pandas_series_pct_change_impl(self, periods=1, fill_method='pad', limit
         return pandas.Series(result)
 
     return hpat_pandas_series_pct_change_impl
+
+
+def hpat_join_series_indexes(left, right):
+    pass
+
+@overload(hpat_join_series_indexes)
+def hpat_join_series_indexes_overload(left, right):
+    '''Function for joining arrays left and right in a way similar to pandas.join 'outer' algorithm'''
+
+    if (isinstance(left, types.Array) and isinstance(right, types.Array)):
+        np_dtypes = [numpy_support.as_dtype(left.dtype), numpy_support.as_dtype(right.dtype)]
+        np_common_dtype = numpy.find_common_type([], np_dtypes)
+        numba_common_dtype = numpy_support.from_dtype(np_common_dtype)
+        if (isinstance(left.dtype, types.Number) and isinstance(right.dtype, types.Number)):
+
+            def hpat_join_series_indexes_impl(left, right):
+
+                # allocate result arrays
+                lsize = len(left)
+                rsize = len(right)
+                est_total_size = int(1.1 * (lsize + rsize))
+
+                lidx = numpy.empty(est_total_size, numpy.int64)
+                ridx = numpy.empty(est_total_size, numpy.int64)
+                joined = numpy.empty(est_total_size, numba_common_dtype)
+
+                # sort arrays saving the old positions
+                sorted_left = numpy.argsort(left, kind='mergesort')
+                sorted_right = numpy.argsort(right, kind='mergesort')
+
+                i, j, k = 0, 0, 0
+                while (i < lsize and j < rsize):
+                    joined = _hpat_ensure_array_capacity(k, joined)
+                    lidx = _hpat_ensure_array_capacity(k, lidx)
+                    ridx = _hpat_ensure_array_capacity(k, ridx)
+
+                    left_index = left[sorted_left[i]]
+                    right_index = right[sorted_right[j]]
+
+                    if (left_index < right_index):
+                        joined[k] = left_index
+                        lidx[k] = sorted_left[i]
+                        ridx[k] = -1
+                        i += 1
+                        k += 1
+                    elif (left_index > right_index):
+                        joined[k] = right_index
+                        lidx[k] = -1
+                        ridx[k] = sorted_right[j]
+                        j += 1
+                        k += 1
+                    else:
+                        # find ends of sequences of equivalent index values in left and right
+                        ni, nj = i, j
+                        while (ni < lsize and left[sorted_left[ni]] == left_index):
+                            ni += 1
+                        while (nj < rsize and right[sorted_right[nj]] == right_index):
+                            nj += 1
+
+                        # join the blocks found into results  
+                        for s in numpy.arange(i, ni, 1):
+                            block_size = nj - j
+                            to_joined = numpy.repeat(left_index, block_size)
+                            to_lidx = numpy.repeat(sorted_left[s], block_size)
+                            to_ridx = numpy.array([sorted_right[k] for k in numpy.arange(j, nj, 1)], numpy.int64)
+
+                            joined = _hpat_ensure_array_capacity(k + block_size, joined)
+                            lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
+                            ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
+
+                            joined[k:k + block_size] = to_joined
+                            lidx[k:k + block_size] = to_lidx
+                            ridx[k:k + block_size] = to_ridx
+                            k += block_size
+                        i = ni
+                        j = nj
+
+                # fill the end of joined with remaining part of left or right
+                if i < lsize:
+                    block_size = lsize - i
+                    joined = _hpat_ensure_array_capacity(k + block_size, joined)
+                    lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
+                    ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
+                    ridx[k: k + block_size] = numpy.repeat(-1, block_size)
+                    while i < lsize:
+                        joined[k] = left[sorted_left[i]]
+                        lidx[k] = sorted_left[i]
+                        i += 1
+                        k += 1
+
+                elif j < rsize:
+                    block_size = rsize - j
+                    joined = _hpat_ensure_array_capacity(k + block_size, joined)
+                    lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
+                    ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
+                    lidx[k: k + block_size] = numpy.repeat(-1, block_size)
+                    while j < rsize:
+                        joined[k] = right[sorted_right[j]]
+                        ridx[k] = sorted_right[j]
+                        j += 1
+                        k += 1
+
+                return joined[:k], lidx[:k], ridx[:k]
+
+            return hpat_join_series_indexes_impl
+
+    elif (left == string_array_type and right == string_array_type):
+
+        def hpat_join_series_indexes_impl(left, right):
+
+            # allocate result arrays
+            lsize = len(left)
+            rsize = len(right)
+            est_total_size = int(1.1 * (lsize + rsize))
+
+            lidx = numpy.empty(est_total_size, numpy.int64)
+            ridx = numpy.empty(est_total_size, numpy.int64)
+
+            # use Series.sort_values since argsort for StringArrays not implemented
+            original_left_series = pandas.Series(left)
+            original_right_series = pandas.Series(right)
+
+            # sort arrays saving the old positions
+            left_series = original_left_series.sort_values(kind='mergesort')
+            right_series = original_right_series.sort_values(kind='mergesort')
+            sorted_left = left_series._index
+            sorted_right = right_series._index
+
+            i, j, k = 0, 0, 0
+            while (i < lsize and j < rsize):
+                lidx = _hpat_ensure_array_capacity(k, lidx)
+                ridx = _hpat_ensure_array_capacity(k, ridx)
+
+                left_index = left[sorted_left[i]]
+                right_index = right[sorted_right[j]]
+
+                if (left_index < right_index):
+                    lidx[k] = sorted_left[i]
+                    ridx[k] = -1
+                    i += 1
+                    k += 1
+                elif (left_index > right_index):
+                    lidx[k] = -1
+                    ridx[k] = sorted_right[j]
+                    j += 1
+                    k += 1
+                else:
+                    # find ends of sequences of equivalent index values in left and right
+                    ni, nj = i, j
+                    while (ni < lsize and left[sorted_left[ni]] == left_index):
+                        ni += 1
+                    while (nj < rsize and right[sorted_right[nj]] == right_index):
+                        nj += 1
+
+                    # join the blocks found into results
+                    for s in numpy.arange(i, ni, 1):
+                        block_size = nj - j
+                        to_lidx = numpy.repeat(sorted_left[s], block_size)
+                        to_ridx = numpy.array([sorted_right[k] for k in numpy.arange(j, nj, 1)], numpy.int64)
+
+                        lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
+                        ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
+
+                        lidx[k:k + block_size] = to_lidx
+                        ridx[k:k + block_size] = to_ridx
+                        k += block_size
+                    i = ni
+                    j = nj
+
+            # fill the end of joined with remaining part of left or right
+            if i < lsize:
+                block_size = lsize - i
+                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
+                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
+                ridx[k: k + block_size] = numpy.repeat(-1, block_size)
+                while i < lsize:
+                    lidx[k] = sorted_left[i]
+                    i += 1
+                    k += 1
+
+            elif j < rsize:
+                block_size = rsize - j
+                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
+                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
+                lidx[k: k + block_size] = numpy.repeat(-1, block_size)
+                while j < rsize:
+                    ridx[k] = sorted_right[j]
+                    j += 1
+                    k += 1
+
+            # count total number of characters and allocate joined array
+            total_joined_size = k
+            num_chars_in_joined = 0
+            for i in numpy.arange(total_joined_size):
+                if lidx[i] != -1:
+                    num_chars_in_joined += len(left[lidx[i]])
+                elif ridx[i] != -1:
+                    num_chars_in_joined += len(right[ridx[i]])
+
+            joined = pre_alloc_string_array(total_joined_size, num_chars_in_joined)
+
+            # iterate over joined and fill it with indexes using lidx and ridx indexers
+            for i in numpy.arange(total_joined_size):
+                if lidx[i] != -1:
+                    joined[i] = left[lidx[i]]
+                    if (str_arr_is_na(left, lidx[i])):
+                        str_arr_set_na(joined, i)
+                elif ridx[i] != -1:
+                    joined[i] = right[ridx[i]]
+                    if (str_arr_is_na(right, ridx[i])):
+                        str_arr_set_na(joined, i)
+                else:
+                    str_arr_set_na(joined, i)
+
+            return joined, lidx, ridx
+
+        return hpat_join_series_indexes_impl
+
+
+@overload(operator.add)
+def hpat_pandas_series_operator_add(self, other):
+    """
+    Pandas Series operator :attr:`pandas.Series.add` implementation
+
+    .. only:: developer
+
+    **Test**: python -m hpat.runtests sdc.tests.test_series.TestSeries.test_series_op1
+              python -m hpat.runtests sdc.tests.test_series.TestSeries.test_series_op2
+              python -m hpat.runtests sdc.tests.test_series.TestSeries.test_series_op3
+              python -m hpat.runtests sdc.tests.test_series.TestSeries.test_series_op4
+              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_add*
+
+    Parameters
+    ----------
+    series: :obj:`pandas.Series`
+        Input series
+    other: :obj:`pandas.Series` or :obj:`scalar`
+        Series or scalar value to be used as a second argument of binary operation
+
+    Returns
+    -------
+    :obj:`pandas.Series`
+        The result of the operation
+    """
+
+    _func_name = 'Operator add().'
+
+    if not isinstance(self, SeriesType):
+        raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))
+
+    if not (isinstance(other, SeriesType)
+                or isinstance(other, types.Number)):
+        raise TypingError('{} The object must be a pandas.series or a scalar. Given: {}'.format(_func_name, other))
+
+    series_indexes_alignable = False
+    if isinstance(other, SeriesType):
+        if (other.index == string_array_type and self.index == string_array_type):
+            series_indexes_alignable = True
+
+        if ((isinstance(self.index, types.NoneType) or
+                isinstance(self.index, types.Array) and isinstance(self.index.dtype, types.Number))
+            and (isinstance(other.index, types.NoneType) or
+                    isinstance(other.index, types.Array) and isinstance(other.index.dtype, types.Number))):
+            series_indexes_alignable = True
+
+    if isinstance(other, SeriesType) and not series_indexes_alignable:
+        raise TypingError('{} Not implemented for series with not-alignable indexes. \
+        Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index))
+
+    if (isinstance(other, types.Number)):
+        def hpat_pandas_series_add_scalar_impl(self, other):
+            return pandas.Series(self._data + other, self._index)
+
+        return hpat_pandas_series_add_scalar_impl
+
+    elif (isinstance(other, SeriesType)):
+        is_numeric_index = isinstance(self.index, (types.Array, types.NoneType))
+
+        if is_numeric_index:
+            ty_left_index_dtype = types.int64 if isinstance(self.index, types.NoneType) else self.index.dtype
+            ty_right_index_dtype = types.int64 if isinstance(other.index, types.NoneType) else other.index.dtype
+            np_index_dtypes = [numpy_support.as_dtype(ty_left_index_dtype), numpy_support.as_dtype(ty_right_index_dtype)]
+            np_index_common_dtype = numpy.find_common_type([], np_index_dtypes)
+            numba_index_common_dtype = numpy_support.from_dtype(np_index_common_dtype)
+
+        if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)):
+            def hpat_pandas_series_add_impl(self, other):
+
+                if (len(self._data) == len(other._data)):
+                    return pandas.Series(numpy.asarray(self._data + other._data, numpy.float64))
+                else:
+                    min_data_size = min(len(self._data), len(other._data))
+                    max_data_size = max(len(self._data), len(other._data))
+                    new_data = numpy.empty(max_data_size, dtype=numpy.float64)
+                    new_data[:min_data_size] = self._data[:min_data_size] + other._data[:min_data_size]
+                    new_data[min_data_size:] = numpy.repeat(numpy.nan, max_data_size - min_data_size)
+
+                    return pandas.Series(new_data, self._index)
+        else:
+            def hpat_pandas_series_add_impl(self, other):
+                left_index, right_index = self.index, other.index
+
+                # check if indexes are equal and series don't have to be aligned
+                if is_numeric_index == True:  # noqa
+                    if (numpy.array_equal(left_index, right_index)):
+                        return pandas.Series(numpy.asarray(self._data + other._data, numpy.float64),
+                                             numpy.asarray(left_index, numba_index_common_dtype))
+                else:
+                    # TODO: replace with StringArrays comparison
+                    is_index_equal = (len(self._index) == len(other._index)
+                                      and num_total_chars(self._index) == num_total_chars(other._index))
+                    for i in numpy.arange(len(self._index)):
+                        if (self._index[i] != other._index[i]
+                                or str_arr_is_na(self._index, i) is not str_arr_is_na(other._index, i)):
+                            is_index_equal = False
+
+                    if is_index_equal:
+                        return pandas.Series(numpy.asarray(self._data + other._data, numpy.float64),
+                                             self._index)
+
+                # TODO: replace below with core join(how='outer') when implemented
+                joined_index, left_indexer, right_indexer = hpat_join_series_indexes(left_index, right_index)
+
+                joined_index_range = numpy.arange(len(joined_index))
+                left_values = numpy.asarray(
+                    [self._data[left_indexer[i]] for i in joined_index_range],
+                    numpy.float64
+                )
+                left_values[left_indexer == -1] = numpy.nan
+
+                right_values = numpy.asarray(
+                    [other._data[right_indexer[i]] for i in joined_index_range],
+                    numpy.float64
+                )
+                right_values[right_indexer == -1] = numpy.nan
+
+                return pandas.Series(left_values + right_values, joined_index)
+
+
+    return hpat_pandas_series_add_impl
diff --git a/sdc/hiframes/pd_series_ext.py b/sdc/hiframes/pd_series_ext.py
index f023aea48..037658f68 100644
--- a/sdc/hiframes/pd_series_ext.py
+++ b/sdc/hiframes/pd_series_ext.py
@@ -1239,10 +1239,11 @@ def generic(self, args, kws):
         return series_op_generic(SeriesUnaryOpUfuncs, self, args, kws)
 
 
-# TODO: change class name to Series in install_operations
-SeriesOpUfuncs.install_operations()
-SeriesInplaceOpUfuncs.install_operations()
-SeriesUnaryOpUfuncs.install_operations()
+if sdc.config.config_pipeline_hpat_default:
+    # TODO: change class name to Series in install_operations
+    SeriesOpUfuncs.install_operations()
+    SeriesInplaceOpUfuncs.install_operations()
+    SeriesUnaryOpUfuncs.install_operations()
 
 
 class Series_Numpy_rules_ufunc(Numpy_rules_ufunc):
diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py
index 0d47f70dd..7c24d167e 100644
--- a/sdc/tests/test_series.py
+++ b/sdc/tests/test_series.py
@@ -33,7 +33,7 @@
 import numpy as np
 import pyarrow.parquet as pq
 import sdc
-from itertools import islice, permutations, product
+from itertools import islice, permutations, product, combinations
 from sdc.tests.test_base import TestCase
 from sdc.tests.test_utils import (
     count_array_REPs, count_parfor_REPs, count_array_OneDs, get_start_end,
@@ -4579,6 +4579,275 @@ def test_series_pct_change_impl(S, periods=1, fill_method='pad', limit=None, fre
         msg = 'Method pct_change(). The object periods'
         self.assertIn(msg, str(raises.exception))
 
+    def test_series_setitem_for_value(self):
+        def test_impl(S, val):
+            S[3] = val
+            return S
+
+        hpat_func = self.jit(test_impl)
+        S = pd.Series([0, 1, 2, 3, 4])
+        value = 50
+        result_ref = test_impl(S, value)
+        result = hpat_func(S, value)
+        pd.testing.assert_series_equal(result_ref, result)
+
+    def test_series_setitem_for_slice(self):
+        def test_impl(S, val):
+            S[2:] = val
+            return S
+
+        hpat_func = self.jit(test_impl)
+        S = pd.Series([0, 1, 2, 3, 4])
+        value = 50
+        result_ref = test_impl(S, value)
+        result = hpat_func(S, value)
+        pd.testing.assert_series_equal(result_ref, result)
+
+    def test_series_setitem_for_series(self):
+        def test_impl(S, ind, val):
+            S[ind] = val
+            return S
+
+        hpat_func = self.jit(test_impl)
+        S = pd.Series([0, 1, 2, 3, 4])
+        ind = pd.Series([0, 2, 4])
+        value = 50
+        result_ref = test_impl(S, ind, value)
+        result = hpat_func(S, ind, value)
+        pd.testing.assert_series_equal(result_ref, result)
+
+    def test_series_setitem_unsupported(self):
+        def test_impl(S, ind, val):
+            S[ind] = val
+            return S
+
+        hpat_func = self.jit(test_impl)
+        S = pd.Series([0, 1, 2, 3, 4, 5])
+        ind1 = 5
+        ind2 = '3'
+        value1 = 'ababa'
+        value2 = 101
+
+        with self.assertRaises(TypingError) as raises:
+            hpat_func(S, ind1, value1)
+        msg = 'Operator setitem(). Value must be one type with series.'
+        self.assertIn(msg, str(raises.exception))
+
+        with self.assertRaises(TypingError) as raises:
+            hpat_func(S, ind2, value2)
+        msg = 'Operator setitem(). The index must be an Integer, Slice or a pandas.series.'
+        self.assertIn(msg, str(raises.exception))
+
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_scalar(self):
+        '''Verifies Series.operator.add implementation for scalar second operand'''
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 7
+        dtype_to_index = {'None': None,
+                          'int': np.arange(n, dtype='int'),
+                          'float': np.arange(n, dtype='float'),
+                          'string': ['aa', 'aa', 'b', 'b', 'cccc', 'dd', 'ddd']}
+
+        int_scalar = 24
+        for dtype, index_data in dtype_to_index.items():
+            with self.subTest(index_dtype=dtype, index=index_data):
+                if platform.system() == 'Windows' and not IS_32BITS:
+                    A = pd.Series(np.arange(n, dtype=np.int64), index=index_data)
+                else:
+                    A = pd.Series(np.arange(n), index=index_data)
+                pd.testing.assert_series_equal(hpat_func(A, int_scalar), test_impl(A, int_scalar), check_names=False)
+
+        float_scalar = 24.0
+        for dtype, index_data in dtype_to_index.items():
+            with self.subTest(index_dtype=dtype, index=index_data):
+                if platform.system() == 'Windows' and not IS_32BITS:
+                    A = pd.Series(np.arange(n, dtype=np.int64), index=index_data)
+                else:
+                    A = pd.Series(np.arange(n), index=index_data)
+                pd.testing.assert_series_equal(hpat_func(A, float_scalar), test_impl(A, float_scalar), check_names=False)
+
+    def test_series_operator_add_same_index_default(self):
+        '''Verifies implementation of Series.operator.add between two Series with default indexes and same size'''
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 7
+        dtypes_to_test = (np.int32, np.int64, np.float32, np.float64)
+        for dtype_left, dtype_right in combinations(dtypes_to_test, 2):
+            with self.subTest(left_series_dtype=dtype_left, right_series_dtype=dtype_right):
+                A = pd.Series(np.arange(n), dtype=dtype_left)
+                B = pd.Series(np.arange(n)**2, dtype=dtype_right)
+                pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
+
+    @unittest.skip("TODO: find out why pandas aligning series indexes produces Int64Index when common dtype is float\n"
+                   "AssertionError: Series.index are different\n"
+                   "Series.index classes are not equivalent\n"
+                   "[left]:  Float64Index([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], dtype='float64')\n"
+                   "[right]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='int64')\n")
+    def test_series_operator_add_same_index_numeric_fixme(self):
+        '''Verifies implementation of Series.operator.add between two Series with the same numeric indexes of different dtypes'''
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 7
+        dtypes_to_test = (np.int32, np.int64, np.float32, np.float64)
+        for dtype_left, dtype_right in combinations(dtypes_to_test, 2):
+            with self.subTest(left_series_dtype=dtype_left, right_series_dtype=dtype_right):
+                A = pd.Series(np.arange(n), index=np.arange(n, dtype=dtype_left))
+                B = pd.Series(np.arange(n)**2, index=np.arange(n, dtype=dtype_right))
+                pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_same_index_numeric(self):
+        ''' Same as test_series_operator_add_same_index_numeric but with w/a for the problem.
+        Can be deleted when the latter is fixed '''
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 7
+        int_dtypes_to_test = (np.int32, np.int64, np.float32, np.float64)
+        for dtype_left, dtype_right in combinations(int_dtypes_to_test, 2):
+            # FIXME: skip the sub-test if one of the dtypes is float and the other is integer
+            if not (np.issubdtype(dtype_left, np.integer) and np.issubdtype(dtype_right, np.integer)
+                    or np.issubdtype(dtype_left, np.float) and np.issubdtype(dtype_right, np.float)):
+                continue
+
+            with self.subTest(left_series_dtype=dtype_left, right_series_dtype=dtype_right):
+                A = pd.Series(np.arange(n), index=np.arange(n, dtype=dtype_left))
+                B = pd.Series(np.arange(n)**2, index=np.arange(n, dtype=dtype_right))
+                pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_same_index_str(self):
+        '''Verifies implementation of Series.operator.add between two Series with the same string indexes'''
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 7
+        A = pd.Series(np.arange(n), index=['a', 'c', 'e', 'c', 'b', 'a', 'o'])
+        B = pd.Series(np.arange(n)**2, index=['a', 'c', 'e', 'c', 'b', 'a', 'o'])
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_align_index_int(self):
+        '''Verifies implementation of Series.operator.add between two Series with non-equal integer indexes'''
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 11
+        index_A = [0, 1, 1, 2, 3, 3, 3, 4, 6, 8, 9]
+        index_B = [0, 1, 1, 3, 4, 4, 5, 5, 6, 6, 9]
+        np.random.shuffle(index_A)
+        np.random.shuffle(index_B)
+        A = pd.Series(np.arange(n), index=index_A)
+        B = pd.Series(np.arange(n)**2, index=index_B)
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_align_index_str(self):
+        '''Verifies implementation of Series.operator.add between two Series with non-equal string indexes'''
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 11
+        index_A = ['', '', 'aa', 'aa', 'ae', 'ae', 'b', 'ccc', 'cccc', 'oo', 's']
+        index_B = ['', '', 'aa', 'aa', 'cc', 'cccc', 'e', 'f', 'h', 'oo', 's']
+        np.random.shuffle(index_A)
+        np.random.shuffle(index_B)
+        A = pd.Series(np.arange(n), index=index_A)
+        B = pd.Series(np.arange(n)**2, index=index_B)
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skip("TODO: fix Series.sort_values to handle both None and '' in string series\n")
+    def test_series_operator_add_align_index_str_fixme(self):
+        '''Same as test_series_operator_add_align_index_str but with None values in string indexes'''
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 11
+        index_A = ['', '', 'aa', 'aa', 'ae', 'b', 'ccc', 'cccc', 'oo', None, None]
+        index_B = ['', '', 'aa', 'aa', 'cccc', 'f', 'h', 'oo', 's', None, None]
+        np.random.shuffle(index_A)
+        np.random.shuffle(index_B)
+        A = pd.Series(np.arange(n), index=index_A)
+        B = pd.Series(np.arange(n)**2, index=index_B)
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_align_index_other_dtype(self):
+        '''Verifies implementation of Series.operator.add between two Series
+           with non-equal integer indexes of different dtypes'''
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 7
+        A = pd.Series(np.arange(3*n), index=np.arange(-n, 2*n, 1, dtype=np.int64))
+        B = pd.Series(np.arange(3*n)**2, index=np.arange(0, 3*n, 1, dtype=np.float64))
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with different sizes are not supported in old-style')
+    def test_series_operator_add_diff_series_sizes(self):
+        '''Verifies implementation of Series.operator.add between two Series with different sizes'''
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        size_A, size_B = 7, 25
+        A = pd.Series(np.arange(size_A))
+        B = pd.Series(np.arange(size_B)**2)
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series requiring alignment of indexes are not supported in old-style')
+    def test_series_operator_add_align_index_int_capacity(self):
+        '''Verifies implementation of Series.operator.add and alignment of numeric indexes of large size'''
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 20000
+        np.random.seed(0)
+        index1 = np.random.randint(-30, 30, n)
+        index2 = np.random.randint(-30, 30, n)
+        A = pd.Series(np.random.ranf(n), index=index1)
+        B = pd.Series(np.random.ranf(n), index=index2)
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skip("Test hangs due to a call of Series.sort_values")
+    def test_series_operator_add_align_index_str_capacity(self):
+        '''Verifies implementation of Series.operator.add and alignment of string indexes of large size'''
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 20000
+        np.random.seed(0)
+        valid_ids = ['', 'aaa', 'a', 'b', 'ccc', 'ef', 'ff', 'fff', 'fa', 'dddd']
+        index1 = [valid_ids[i] for i in np.random.randint(0, len(valid_ids), n)]
+        index2 = [valid_ids[i] for i in np.random.randint(0, len(valid_ids), n)]
+        A = pd.Series(np.random.ranf(n), index=index1)
+        B = pd.Series(np.random.ranf(n), index=index2)
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
 
 if __name__ == "__main__":
     unittest.main()

From 80de726e958a3154826d6be0b596b0505c023e55 Mon Sep 17 00:00:00 2001
From: "Kozlov, Alexey" <alexey.kozlov@intel.com>
Date: Fri, 22 Nov 2019 19:40:12 +0300
Subject: [PATCH 2/4] Applying review comments and addding tests

---
 sdc/datatypes/common_functions.py             |   5 +-
 sdc/datatypes/hpat_pandas_series_functions.py |  21 +-
 sdc/tests/test_series.py                      | 225 ++++++++++++++++++
 3 files changed, 238 insertions(+), 13 deletions(-)

diff --git a/sdc/datatypes/common_functions.py b/sdc/datatypes/common_functions.py
index 631aebccc..a794d78c3 100644
--- a/sdc/datatypes/common_functions.py
+++ b/sdc/datatypes/common_functions.py
@@ -121,7 +121,7 @@ def hpat_arrays_append(A, B):
 
 @overload(hpat_arrays_append)
 def hpat_arrays_append_overload(A, B):
-    '''Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A'''
+    """Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A"""
 
     if isinstance(A, types.Array):
         if isinstance(B, types.Array):
@@ -186,8 +186,7 @@ def _append_list_string_array_impl(A, B):
 
 @numba.njit
 def _hpat_ensure_array_capacity(new_size, arr):
-    '''Function creating a copy of numpy array with a size more than specified'''
-    # TODO: replace this function with np.resize when supported by Numba
+    """Function creating a copy of numpy array with a size more than specified"""
     k = len(arr)
     if k > new_size:
         return arr
diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py
index 30b703940..7abb7f9b7 100644
--- a/sdc/datatypes/hpat_pandas_series_functions.py
+++ b/sdc/datatypes/hpat_pandas_series_functions.py
@@ -3730,6 +3730,7 @@ def hpat_pandas_series_pct_change_impl(self, periods=1, fill_method='pad', limit
 def hpat_join_series_indexes(left, right):
     pass
 
+
 @overload(hpat_join_series_indexes)
 def hpat_join_series_indexes_overload(left, right):
     '''Function for joining arrays left and right in a way similar to pandas.join 'outer' algorithm'''
@@ -3784,7 +3785,7 @@ def hpat_join_series_indexes_impl(left, right):
                         while (nj < rsize and right[sorted_right[nj]] == right_index):
                             nj += 1
 
-                        # join the blocks found into results  
+                        # join the blocks found into results
                         for s in numpy.arange(i, ni, 1):
                             block_size = nj - j
                             to_joined = numpy.repeat(left_index, block_size)
@@ -3972,12 +3973,11 @@ def hpat_pandas_series_operator_add(self, other):
 
     _func_name = 'Operator add().'
 
-    if not isinstance(self, SeriesType):
-        raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))
+    ty_checker = TypeChecker('Operator add().')
+    ty_checker.check(self, SeriesType)
 
-    if not (isinstance(other, SeriesType)
-                or isinstance(other, types.Number)):
-        raise TypingError('{} The object must be a pandas.series or a scalar. Given: {}'.format(_func_name, other))
+    if not isinstance(other, (SeriesType, types.Number)):
+        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')
 
     series_indexes_alignable = False
     if isinstance(other, SeriesType):
@@ -3985,15 +3985,16 @@ def hpat_pandas_series_operator_add(self, other):
             series_indexes_alignable = True
 
         if ((isinstance(self.index, types.NoneType) or
-                isinstance(self.index, types.Array) and isinstance(self.index.dtype, types.Number))
+             isinstance(self.index, types.Array) and isinstance(self.index.dtype, types.Number))
             and (isinstance(other.index, types.NoneType) or
-                    isinstance(other.index, types.Array) and isinstance(other.index.dtype, types.Number))):
+                 isinstance(other.index, types.Array) and isinstance(other.index.dtype, types.Number))):
             series_indexes_alignable = True
 
     if isinstance(other, SeriesType) and not series_indexes_alignable:
         raise TypingError('{} Not implemented for series with not-alignable indexes. \
         Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index))
 
+    # specializations for numeric series - TODO: support arithmetic operation on StringArrays
     if (isinstance(other, types.Number)):
         def hpat_pandas_series_add_scalar_impl(self, other):
             return pandas.Series(self._data + other, self._index)
@@ -4006,7 +4007,8 @@ def hpat_pandas_series_add_scalar_impl(self, other):
         if is_numeric_index:
             ty_left_index_dtype = types.int64 if isinstance(self.index, types.NoneType) else self.index.dtype
             ty_right_index_dtype = types.int64 if isinstance(other.index, types.NoneType) else other.index.dtype
-            np_index_dtypes = [numpy_support.as_dtype(ty_left_index_dtype), numpy_support.as_dtype(ty_right_index_dtype)]
+            np_index_dtypes = [numpy_support.as_dtype(ty_left_index_dtype),
+                               numpy_support.as_dtype(ty_right_index_dtype)]
             np_index_common_dtype = numpy.find_common_type([], np_index_dtypes)
             numba_index_common_dtype = numpy_support.from_dtype(np_index_common_dtype)
 
@@ -4063,5 +4065,4 @@ def hpat_pandas_series_add_impl(self, other):
 
                 return pandas.Series(left_values + right_values, joined_index)
 
-
     return hpat_pandas_series_add_impl
diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py
index 7c24d167e..95073361a 100644
--- a/sdc/tests/test_series.py
+++ b/sdc/tests/test_series.py
@@ -4848,6 +4848,231 @@ def test_impl(A, B):
         B = pd.Series(np.random.ranf(n), index=index2)
         pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
 
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_numeric_scalar(self):
+        """Verifies Series.operator.add implementation for numeric series and scalar second operand"""
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 7
+        dtype_to_index = {'None': None,
+                          'int': np.arange(n, dtype='int'),
+                          'float': np.arange(n, dtype='float'),
+                          'string': ['aa', 'aa', 'b', 'b', 'cccc', 'dd', 'ddd']}
+
+        int_scalar = 24
+        for dtype, index_data in dtype_to_index.items():
+            with self.subTest(index_dtype=dtype, index=index_data):
+                if platform.system() == 'Windows' and not IS_32BITS:
+                    A = pd.Series(np.arange(n, dtype=np.int64), index=index_data)
+                else:
+                    A = pd.Series(np.arange(n), index=index_data)
+                pd.testing.assert_series_equal(hpat_func(A, int_scalar), test_impl(A, int_scalar), check_names=False)
+
+        float_scalar = 24.0
+        for dtype, index_data in dtype_to_index.items():
+            with self.subTest(index_dtype=dtype, index=index_data):
+                if platform.system() == 'Windows' and not IS_32BITS:
+                    A = pd.Series(np.arange(n, dtype=np.int64), index=index_data)
+                else:
+                    A = pd.Series(np.arange(n), index=index_data)
+                ref_result = test_impl(A, float_scalar)
+                result = hpat_func(A, float_scalar)
+                pd.testing.assert_series_equal(result, ref_result, check_names=False)
+
+    def test_series_operator_add_numeric_same_index_default(self):
+        """Verifies implementation of Series.operator.add between two numeric Series
+        with default indexes and same size"""
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 7
+        dtypes_to_test = (np.int32, np.int64, np.float32, np.float64)
+        for dtype_left, dtype_right in combinations(dtypes_to_test, 2):
+            with self.subTest(left_series_dtype=dtype_left, right_series_dtype=dtype_right):
+                A = pd.Series(np.arange(n), dtype=dtype_left)
+                B = pd.Series(np.arange(n)**2, dtype=dtype_right)
+                pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
+
+    @unittest.skip("TODO: find out why pandas aligning series indexes produces Int64Index when common dtype is float\n"
+                   "AssertionError: Series.index are different\n"
+                   "Series.index classes are not equivalent\n"
+                   "[left]:  Float64Index([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], dtype='float64')\n"
+                   "[right]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='int64')\n")
+    def test_series_operator_add_numeric_same_index_numeric_fixme(self):
+        """Verifies implementation of Series.operator.add between two numeric Series
+           with the same numeric indexes of different dtypes"""
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 7
+        dtypes_to_test = (np.int32, np.int64, np.float32, np.float64)
+        for dtype_left, dtype_right in combinations(dtypes_to_test, 2):
+            with self.subTest(left_series_dtype=dtype_left, right_series_dtype=dtype_right):
+                A = pd.Series(np.arange(n), index=np.arange(n, dtype=dtype_left))
+                B = pd.Series(np.arange(n)**2, index=np.arange(n, dtype=dtype_right))
+                pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_numeric_same_index_numeric(self):
+        """ Same as test_series_operator_add_same_index_numeric but with w/a for the problem.
+        Can be deleted when the latter is fixed """
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 7
+        int_dtypes_to_test = (np.int32, np.int64, np.float32, np.float64)
+        for dtype_left, dtype_right in combinations(int_dtypes_to_test, 2):
+            # FIXME: skip the sub-test if one of the dtypes is float and the other is integer
+            if not (np.issubdtype(dtype_left, np.integer) and np.issubdtype(dtype_right, np.integer)
+                    or np.issubdtype(dtype_left, np.float) and np.issubdtype(dtype_right, np.float)):
+                continue
+
+            with self.subTest(left_series_dtype=dtype_left, right_series_dtype=dtype_right):
+                A = pd.Series(np.arange(n), index=np.arange(n, dtype=dtype_left))
+                B = pd.Series(np.arange(n)**2, index=np.arange(n, dtype=dtype_right))
+                pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_numeric_same_index_str(self):
+        """Verifies implementation of Series.operator.add between two numeric Series with the same string indexes"""
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 7
+        A = pd.Series(np.arange(n), index=['a', 'c', 'e', 'c', 'b', 'a', 'o'])
+        B = pd.Series(np.arange(n)**2, index=['a', 'c', 'e', 'c', 'b', 'a', 'o'])
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_numeric_align_index_int(self):
+        """Verifies implementation of Series.operator.add between two numeric Series with non-equal integer indexes"""
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 11
+        index_A = [0, 1, 1, 2, 3, 3, 3, 4, 6, 8, 9]
+        index_B = [0, 1, 1, 3, 4, 4, 5, 5, 6, 6, 9]
+        np.random.shuffle(index_A)
+        np.random.shuffle(index_B)
+        A = pd.Series(np.arange(n), index=index_A)
+        B = pd.Series(np.arange(n)**2, index=index_B)
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_numeric_align_index_str(self):
+        """Verifies implementation of Series.operator.add between two numeric Series with non-equal string indexes"""
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 11
+        index_A = ['', '', 'aa', 'aa', 'ae', 'ae', 'b', 'ccc', 'cccc', 'oo', 's']
+        index_B = ['', '', 'aa', 'aa', 'cc', 'cccc', 'e', 'f', 'h', 'oo', 's']
+        np.random.shuffle(index_A)
+        np.random.shuffle(index_B)
+        A = pd.Series(np.arange(n), index=index_A)
+        B = pd.Series(np.arange(n)**2, index=index_B)
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skip("TODO: fix Series.sort_values to handle both None and '' in string series\n")
+    def test_series_operator_add_numeric_align_index_str_fixme(self):
+        """Same as test_series_operator_add_align_index_str but with None values in string indexes"""
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 11
+        index_A = ['', '', 'aa', 'aa', 'ae', 'b', 'ccc', 'cccc', 'oo', None, None]
+        index_B = ['', '', 'aa', 'aa', 'cccc', 'f', 'h', 'oo', 's', None, None]
+        np.random.shuffle(index_A)
+        np.random.shuffle(index_B)
+        A = pd.Series(np.arange(n), index=index_A)
+        B = pd.Series(np.arange(n)**2, index=index_B)
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_numeric_align_index_other_dtype(self):
+        """Verifies implementation of Series.operator.add between two numeric Series
+        with non-equal integer indexes of different dtypes"""
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 7
+        A = pd.Series(np.arange(3*n), index=np.arange(-n, 2*n, 1, dtype=np.int64))
+        B = pd.Series(np.arange(3*n)**2, index=np.arange(0, 3*n, 1, dtype=np.float64))
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series with different sizes are not supported in old-style')
+    def test_series_operator_add_numeric_diff_series_sizes(self):
+        """Verifies implementation of Series.operator.add between two numeric Series with different sizes"""
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        size_A, size_B = 7, 25
+        A = pd.Series(np.arange(size_A))
+        B = pd.Series(np.arange(size_B)**2)
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
+                     'Arithmetic operations on Series requiring alignment of indexes are not supported in old-style')
+    def test_series_operator_add_align_index_int_capacity(self):
+        """Verifies implementation of Series.operator.add and alignment of numeric indexes of large size"""
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 20000
+        np.random.seed(0)
+        index1 = np.random.randint(-30, 30, n)
+        index2 = np.random.randint(-30, 30, n)
+        A = pd.Series(np.random.ranf(n), index=index1)
+        B = pd.Series(np.random.ranf(n), index=index2)
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skip("Test hangs due to a call of Series.sort_values")
+    def test_series_operator_add_align_index_str_capacity(self):
+        """Verifies implementation of Series.operator.add and alignment of string indexes of large size"""
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        n = 20000
+        np.random.seed(0)
+        valid_ids = ['', 'aaa', 'a', 'b', 'ccc', 'ef', 'ff', 'fff', 'fa', 'dddd']
+        index1 = [valid_ids[i] for i in np.random.randint(0, len(valid_ids), n)]
+        index2 = [valid_ids[i] for i in np.random.randint(0, len(valid_ids), n)]
+        A = pd.Series(np.random.ranf(n), index=index1)
+        B = pd.Series(np.random.ranf(n), index=index2)
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
+    @unittest.skip("TODO: support arithemetic operations on StringArrays and extend Series.operator.add overload")
+    def test_series_operator_add_str_same_index_default(self):
+        """Verifies implementation of Series.operator.add between two string Series
+        with default indexes and same size"""
+        def test_impl(A, B):
+            return A + B
+        hpat_func = self.jit(test_impl)
+
+        A = pd.Series(['a', '', 'ae', 'b', 'cccc', 'oo', None])
+        B = pd.Series(['b', 'aa', '', 'b', 'o', None, 'oo'])
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
+
 
 if __name__ == "__main__":
     unittest.main()

From 738d74ad99fbb15005ca646b506168819309bbf0 Mon Sep 17 00:00:00 2001
From: "Kozlov, Alexey" <alexey.kozlov@intel.com>
Date: Wed, 4 Dec 2019 17:33:31 +0300
Subject: [PATCH 3/4] More comments and refactoring from review

---
 sdc/datatypes/common_functions.py             |  18 +-
 sdc/datatypes/hpat_pandas_series_functions.py |   3 +-
 sdc/tests/test_series.py                      | 265 +++---------------
 3 files changed, 52 insertions(+), 234 deletions(-)

diff --git a/sdc/datatypes/common_functions.py b/sdc/datatypes/common_functions.py
index a794d78c3..f22028a14 100644
--- a/sdc/datatypes/common_functions.py
+++ b/sdc/datatypes/common_functions.py
@@ -92,7 +92,7 @@ def check(self, data, accepted_type, name=''):
 
 
 def has_literal_value(var, value):
-    '''Used during typing to check that variable var is a Numba literal value equal to value'''
+    """Used during typing to check that variable var is a Numba literal value equal to value"""
 
     if not isinstance(var, types.Literal):
         return False
@@ -104,7 +104,7 @@ def has_literal_value(var, value):
 
 
 def has_python_value(var, value):
-    '''Used during typing to check that variable var was resolved as Python type and has specific value'''
+    """Used during typing to check that variable var was resolved as Python type and has specific value"""
 
     if not isinstance(var, type(value)):
         return False
@@ -115,6 +115,11 @@ def has_python_value(var, value):
         return var == value
 
 
+def check_index_is_numeric(ty_series):
+    """Used during typing to check that series has numeric index"""
+    return isinstance(ty_series.index, types.Array) and isinstance(ty_series.index.dtype, types.Number)
+
+
 def hpat_arrays_append(A, B):
     pass
 
@@ -197,3 +202,12 @@ def _hpat_ensure_array_capacity(new_size, arr):
     res = numpy.empty(n, arr.dtype)
     res[:k] = arr[:k]
     return res
+
+def find_common_dtype_for_scalar_numpy_types(dtype1, dtype2):
+    """Used to find common numba dtype for two numba dtypes each representing some scalar numpy dtype"""
+    np_dtypes = [numpy_support.as_dtype(dtype1),
+                 numpy_support.as_dtype(dtype2)]
+    np_common_dtype = numpy.find_common_type([], np_dtypes)
+    numba_common_dtype = numpy_support.from_dtype(np_common_dtype)
+
+    return numba_common_dtype
\ No newline at end of file
diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py
index 7abb7f9b7..22361bd87 100644
--- a/sdc/datatypes/hpat_pandas_series_functions.py
+++ b/sdc/datatypes/hpat_pandas_series_functions.py
@@ -42,7 +42,8 @@
 import sdc
 import sdc.datatypes.common_functions as common_functions
 from sdc.datatypes.common_functions import TypeChecker
-from sdc.datatypes.common_functions import _hpat_ensure_array_capacity
+from sdc.datatypes.common_functions import (_hpat_ensure_array_capacity, check_index_is_numeric,
+                                            find_common_dtype_for_scalar_numpy_types)
 from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
 from sdc.hiframes.pd_series_ext import SeriesType
 from sdc.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars, string_array_type,
diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py
index 95073361a..3c85a607e 100644
--- a/sdc/tests/test_series.py
+++ b/sdc/tests/test_series.py
@@ -4639,10 +4639,9 @@ def test_impl(S, ind, val):
         self.assertIn(msg, str(raises.exception))
 
 
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
-    def test_series_operator_add_scalar(self):
-        '''Verifies Series.operator.add implementation for scalar second operand'''
+    @skip_sdc_jit('Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_numeric_scalar(self):
+        """Verifies Series.operator.add implementation for numeric series and scalar second operand"""
         def test_impl(A, B):
             return A + B
         hpat_func = self.jit(test_impl)
@@ -4669,10 +4668,13 @@ def test_impl(A, B):
                     A = pd.Series(np.arange(n, dtype=np.int64), index=index_data)
                 else:
                     A = pd.Series(np.arange(n), index=index_data)
-                pd.testing.assert_series_equal(hpat_func(A, float_scalar), test_impl(A, float_scalar), check_names=False)
+                ref_result = test_impl(A, float_scalar)
+                result = hpat_func(A, float_scalar)
+                pd.testing.assert_series_equal(result, ref_result, check_names=False)
 
-    def test_series_operator_add_same_index_default(self):
-        '''Verifies implementation of Series.operator.add between two Series with default indexes and same size'''
+    def test_series_operator_add_numeric_same_index_default(self):
+        """Verifies implementation of Series.operator.add between two numeric Series
+        with default indexes and same size"""
         def test_impl(A, B):
             return A + B
         hpat_func = self.jit(test_impl)
@@ -4685,224 +4687,31 @@ def test_impl(A, B):
                 B = pd.Series(np.arange(n)**2, dtype=dtype_right)
                 pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
 
-    @unittest.skip("TODO: find out why pandas aligning series indexes produces Int64Index when common dtype is float\n"
-                   "AssertionError: Series.index are different\n"
-                   "Series.index classes are not equivalent\n"
-                   "[left]:  Float64Index([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], dtype='float64')\n"
-                   "[right]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='int64')\n")
-    def test_series_operator_add_same_index_numeric_fixme(self):
-        '''Verifies implementation of Series.operator.add between two Series with the same numeric indexes of different dtypes'''
-        def test_impl(A, B):
-            return A + B
-        hpat_func = self.jit(test_impl)
-
-        n = 7
-        dtypes_to_test = (np.int32, np.int64, np.float32, np.float64)
-        for dtype_left, dtype_right in combinations(dtypes_to_test, 2):
-            with self.subTest(left_series_dtype=dtype_left, right_series_dtype=dtype_right):
-                A = pd.Series(np.arange(n), index=np.arange(n, dtype=dtype_left))
-                B = pd.Series(np.arange(n)**2, index=np.arange(n, dtype=dtype_right))
-                pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
-
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
-    def test_series_operator_add_same_index_numeric(self):
-        ''' Same as test_series_operator_add_same_index_numeric but with w/a for the problem.
-        Can be deleted when the latter is fixed '''
-        def test_impl(A, B):
-            return A + B
-        hpat_func = self.jit(test_impl)
-
-        n = 7
-        int_dtypes_to_test = (np.int32, np.int64, np.float32, np.float64)
-        for dtype_left, dtype_right in combinations(int_dtypes_to_test, 2):
-            # FIXME: skip the sub-test if one of the dtypes is float and the other is integer
-            if not (np.issubdtype(dtype_left, np.integer) and np.issubdtype(dtype_right, np.integer)
-                    or np.issubdtype(dtype_left, np.float) and np.issubdtype(dtype_right, np.float)):
-                continue
-
-            with self.subTest(left_series_dtype=dtype_left, right_series_dtype=dtype_right):
-                A = pd.Series(np.arange(n), index=np.arange(n, dtype=dtype_left))
-                B = pd.Series(np.arange(n)**2, index=np.arange(n, dtype=dtype_right))
-                pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
-
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
-    def test_series_operator_add_same_index_str(self):
-        '''Verifies implementation of Series.operator.add between two Series with the same string indexes'''
-        def test_impl(A, B):
-            return A + B
-        hpat_func = self.jit(test_impl)
-
-        n = 7
-        A = pd.Series(np.arange(n), index=['a', 'c', 'e', 'c', 'b', 'a', 'o'])
-        B = pd.Series(np.arange(n)**2, index=['a', 'c', 'e', 'c', 'b', 'a', 'o'])
-        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
-
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
-    def test_series_operator_add_align_index_int(self):
-        '''Verifies implementation of Series.operator.add between two Series with non-equal integer indexes'''
-        def test_impl(A, B):
-            return A + B
-        hpat_func = self.jit(test_impl)
-
-        n = 11
-        index_A = [0, 1, 1, 2, 3, 3, 3, 4, 6, 8, 9]
-        index_B = [0, 1, 1, 3, 4, 4, 5, 5, 6, 6, 9]
-        np.random.shuffle(index_A)
-        np.random.shuffle(index_B)
-        A = pd.Series(np.arange(n), index=index_A)
-        B = pd.Series(np.arange(n)**2, index=index_B)
-        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
-
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
-    def test_series_operator_add_align_index_str(self):
-        '''Verifies implementation of Series.operator.add between two Series with non-equal string indexes'''
-        def test_impl(A, B):
-            return A + B
-        hpat_func = self.jit(test_impl)
-
-        n = 11
-        index_A = ['', '', 'aa', 'aa', 'ae', 'ae', 'b', 'ccc', 'cccc', 'oo', 's']
-        index_B = ['', '', 'aa', 'aa', 'cc', 'cccc', 'e', 'f', 'h', 'oo', 's']
-        np.random.shuffle(index_A)
-        np.random.shuffle(index_B)
-        A = pd.Series(np.arange(n), index=index_A)
-        B = pd.Series(np.arange(n)**2, index=index_B)
-        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
-
-    @unittest.skip("TODO: fix Series.sort_values to handle both None and '' in string series\n")
-    def test_series_operator_add_align_index_str_fixme(self):
-        '''Same as test_series_operator_add_align_index_str but with None values in string indexes'''
-        def test_impl(A, B):
-            return A + B
-        hpat_func = self.jit(test_impl)
-
-        n = 11
-        index_A = ['', '', 'aa', 'aa', 'ae', 'b', 'ccc', 'cccc', 'oo', None, None]
-        index_B = ['', '', 'aa', 'aa', 'cccc', 'f', 'h', 'oo', 's', None, None]
-        np.random.shuffle(index_A)
-        np.random.shuffle(index_B)
-        A = pd.Series(np.arange(n), index=index_A)
-        B = pd.Series(np.arange(n)**2, index=index_B)
-        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
-
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
-    def test_series_operator_add_align_index_other_dtype(self):
-        '''Verifies implementation of Series.operator.add between two Series
-           with non-equal integer indexes of different dtypes'''
-        def test_impl(A, B):
-            return A + B
-        hpat_func = self.jit(test_impl)
-
-        n = 7
-        A = pd.Series(np.arange(3*n), index=np.arange(-n, 2*n, 1, dtype=np.int64))
-        B = pd.Series(np.arange(3*n)**2, index=np.arange(0, 3*n, 1, dtype=np.float64))
-        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
-
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with different sizes are not supported in old-style')
-    def test_series_operator_add_diff_series_sizes(self):
-        '''Verifies implementation of Series.operator.add between two Series with different sizes'''
-        def test_impl(A, B):
-            return A + B
-        hpat_func = self.jit(test_impl)
-
-        size_A, size_B = 7, 25
-        A = pd.Series(np.arange(size_A))
-        B = pd.Series(np.arange(size_B)**2)
-        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
-
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series requiring alignment of indexes are not supported in old-style')
-    def test_series_operator_add_align_index_int_capacity(self):
-        '''Verifies implementation of Series.operator.add and alignment of numeric indexes of large size'''
-        def test_impl(A, B):
-            return A + B
-        hpat_func = self.jit(test_impl)
-
-        n = 20000
-        np.random.seed(0)
-        index1 = np.random.randint(-30, 30, n)
-        index2 = np.random.randint(-30, 30, n)
-        A = pd.Series(np.random.ranf(n), index=index1)
-        B = pd.Series(np.random.ranf(n), index=index2)
-        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
-
-    @unittest.skip("Test hangs due to a call of Series.sort_values")
-    def test_series_operator_add_align_index_str_capacity(self):
-        '''Verifies implementation of Series.operator.add and alignment of string indexes of large size'''
-        def test_impl(A, B):
-            return A + B
-        hpat_func = self.jit(test_impl)
-
-        n = 20000
-        np.random.seed(0)
-        valid_ids = ['', 'aaa', 'a', 'b', 'ccc', 'ef', 'ff', 'fff', 'fa', 'dddd']
-        index1 = [valid_ids[i] for i in np.random.randint(0, len(valid_ids), n)]
-        index2 = [valid_ids[i] for i in np.random.randint(0, len(valid_ids), n)]
-        A = pd.Series(np.random.ranf(n), index=index1)
-        B = pd.Series(np.random.ranf(n), index=index2)
-        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
-
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
-    def test_series_operator_add_numeric_scalar(self):
-        """Verifies Series.operator.add implementation for numeric series and scalar second operand"""
+    def test_series_operator_add_series_dtype_promotion(self):
+        """Verifies implementation of Series.operator.add between two numeric Series of different dtypes"""
         def test_impl(A, B):
             return A + B
         hpat_func = self.jit(test_impl)
 
         n = 7
-        dtype_to_index = {'None': None,
-                          'int': np.arange(n, dtype='int'),
-                          'float': np.arange(n, dtype='float'),
-                          'string': ['aa', 'aa', 'b', 'b', 'cccc', 'dd', 'ddd']}
-
-        int_scalar = 24
-        for dtype, index_data in dtype_to_index.items():
-            with self.subTest(index_dtype=dtype, index=index_data):
-                if platform.system() == 'Windows' and not IS_32BITS:
-                    A = pd.Series(np.arange(n, dtype=np.int64), index=index_data)
-                else:
-                    A = pd.Series(np.arange(n), index=index_data)
-                pd.testing.assert_series_equal(hpat_func(A, int_scalar), test_impl(A, int_scalar), check_names=False)
+        A = pd.Series(np.array(np.arange(n), dtype=np.int32))
+        B = pd.Series(np.array(np.arange(n)**2, dtype=np.float32))
+        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
 
-        float_scalar = 24.0
-        for dtype, index_data in dtype_to_index.items():
-            with self.subTest(index_dtype=dtype, index=index_data):
-                if platform.system() == 'Windows' and not IS_32BITS:
-                    A = pd.Series(np.arange(n, dtype=np.int64), index=index_data)
-                else:
-                    A = pd.Series(np.arange(n), index=index_data)
-                ref_result = test_impl(A, float_scalar)
-                result = hpat_func(A, float_scalar)
-                pd.testing.assert_series_equal(result, ref_result, check_names=False)
-
-    def test_series_operator_add_numeric_same_index_default(self):
-        """Verifies implementation of Series.operator.add between two numeric Series
-        with default indexes and same size"""
-        def test_impl(A, B):
-            return A + B
-        hpat_func = self.jit(test_impl)
-
-        n = 7
         dtypes_to_test = (np.int32, np.int64, np.float32, np.float64)
         for dtype_left, dtype_right in combinations(dtypes_to_test, 2):
             with self.subTest(left_series_dtype=dtype_left, right_series_dtype=dtype_right):
-                A = pd.Series(np.arange(n), dtype=dtype_left)
-                B = pd.Series(np.arange(n)**2, dtype=dtype_right)
+                A = pd.Series(np.array(np.arange(n), dtype=dtype_left))
+                B = pd.Series(np.array(np.arange(n)**2, dtype=dtype_right))
                 pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
 
-    @unittest.skip("TODO: find out why pandas aligning series indexes produces Int64Index when common dtype is float\n"
-                   "AssertionError: Series.index are different\n"
-                   "Series.index classes are not equivalent\n"
-                   "[left]:  Float64Index([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], dtype='float64')\n"
-                   "[right]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='int64')\n")
-    def test_series_operator_add_numeric_same_index_numeric_fixme(self):
+    @skip_numba_jit
+    @skip_sdc_jit("TODO: find out why pandas aligning series indexes produces Int64Index when common dtype is float\n"
+                  "AssertionError: Series.index are different\n"
+                  "Series.index classes are not equivalent\n"
+                  "[left]:  Float64Index([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], dtype='float64')\n"
+                  "[right]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='int64')\n")
+    def test_series_operator_add_numeric_same_index_numeric(self):
         """Verifies implementation of Series.operator.add between two numeric Series
            with the same numeric indexes of different dtypes"""
         def test_impl(A, B):
@@ -4917,9 +4726,8 @@ def test_impl(A, B):
                 B = pd.Series(np.arange(n)**2, index=np.arange(n, dtype=dtype_right))
                 pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
 
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
-    def test_series_operator_add_numeric_same_index_numeric(self):
+    @skip_sdc_jit('Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    def test_series_operator_add_numeric_same_index_numeric_fixme(self):
         """ Same as test_series_operator_add_same_index_numeric but with w/a for the problem.
         Can be deleted when the latter is fixed """
         def test_impl(A, B):
@@ -4939,8 +4747,7 @@ def test_impl(A, B):
                 B = pd.Series(np.arange(n)**2, index=np.arange(n, dtype=dtype_right))
                 pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
 
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    @skip_sdc_jit('Arithmetic operations on Series with non-default indexes are not supported in old-style')
     def test_series_operator_add_numeric_same_index_str(self):
         """Verifies implementation of Series.operator.add between two numeric Series with the same string indexes"""
         def test_impl(A, B):
@@ -4952,8 +4759,7 @@ def test_impl(A, B):
         B = pd.Series(np.arange(n)**2, index=['a', 'c', 'e', 'c', 'b', 'a', 'o'])
         pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
 
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    @skip_sdc_jit('Arithmetic operations on Series with non-default indexes are not supported in old-style')
     def test_series_operator_add_numeric_align_index_int(self):
         """Verifies implementation of Series.operator.add between two numeric Series with non-equal integer indexes"""
         def test_impl(A, B):
@@ -4969,8 +4775,7 @@ def test_impl(A, B):
         B = pd.Series(np.arange(n)**2, index=index_B)
         pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
 
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    @skip_sdc_jit('Arithmetic operations on Series with non-default indexes are not supported in old-style')
     def test_series_operator_add_numeric_align_index_str(self):
         """Verifies implementation of Series.operator.add between two numeric Series with non-equal string indexes"""
         def test_impl(A, B):
@@ -4986,7 +4791,7 @@ def test_impl(A, B):
         B = pd.Series(np.arange(n)**2, index=index_B)
         pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
 
-    @unittest.skip("TODO: fix Series.sort_values to handle both None and '' in string series\n")
+    @skip_sdc_jit("TODO: fix Series.sort_values to handle both None and '' in string series")
     def test_series_operator_add_numeric_align_index_str_fixme(self):
         """Same as test_series_operator_add_align_index_str but with None values in string indexes"""
         def test_impl(A, B):
@@ -5002,8 +4807,7 @@ def test_impl(A, B):
         B = pd.Series(np.arange(n)**2, index=index_B)
         pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
 
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with non-default indexes are not supported in old-style')
+    @skip_sdc_jit('Arithmetic operations on Series with non-default indexes are not supported in old-style')
     def test_series_operator_add_numeric_align_index_other_dtype(self):
         """Verifies implementation of Series.operator.add between two numeric Series
         with non-equal integer indexes of different dtypes"""
@@ -5016,8 +4820,7 @@ def test_impl(A, B):
         B = pd.Series(np.arange(3*n)**2, index=np.arange(0, 3*n, 1, dtype=np.float64))
         pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
 
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series with different sizes are not supported in old-style')
+    @skip_sdc_jit('Arithmetic operations on Series with different sizes are not supported in old-style')
     def test_series_operator_add_numeric_diff_series_sizes(self):
         """Verifies implementation of Series.operator.add between two numeric Series with different sizes"""
         def test_impl(A, B):
@@ -5029,8 +4832,7 @@ def test_impl(A, B):
         B = pd.Series(np.arange(size_B)**2)
         pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
 
-    @unittest.skipIf(sdc.config.config_pipeline_hpat_default,
-                     'Arithmetic operations on Series requiring alignment of indexes are not supported in old-style')
+    @skip_sdc_jit('Arithmetic operations on Series requiring alignment of indexes are not supported in old-style')
     def test_series_operator_add_align_index_int_capacity(self):
         """Verifies implementation of Series.operator.add and alignment of numeric indexes of large size"""
         def test_impl(A, B):
@@ -5045,7 +4847,8 @@ def test_impl(A, B):
         B = pd.Series(np.random.ranf(n), index=index2)
         pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
 
-    @unittest.skip("Test hangs due to a call of Series.sort_values")
+    @skip_numba_jit
+    @skip_sdc_jit('Test hangs due to a call of Series.sort_values')
     def test_series_operator_add_align_index_str_capacity(self):
         """Verifies implementation of Series.operator.add and alignment of string indexes of large size"""
         def test_impl(A, B):

From 7c630fab503adf097208ed5f3b4d3c706dfa4109 Mon Sep 17 00:00:00 2001
From: "Kozlov, Alexey" <alexey.kozlov@intel.com>
Date: Fri, 6 Dec 2019 16:46:34 +0300
Subject: [PATCH 4/4] Bugfix in indexes join and minor changes

---
 sdc/datatypes/common_functions.py             | 254 ++++++++++++++++-
 sdc/datatypes/hpat_pandas_series_functions.py | 261 ++----------------
 sdc/hiframes/pd_series_ext.py                 | 148 +++++-----
 sdc/tests/test_series.py                      |  98 +------
 4 files changed, 354 insertions(+), 407 deletions(-)

diff --git a/sdc/datatypes/common_functions.py b/sdc/datatypes/common_functions.py
index f22028a14..1561d0ed1 100644
--- a/sdc/datatypes/common_functions.py
+++ b/sdc/datatypes/common_functions.py
@@ -31,6 +31,7 @@
 """
 
 import numpy
+import pandas
 
 import numba
 from numba import types
@@ -39,7 +40,8 @@
 from numba import numpy_support
 
 import sdc
-from sdc.str_arr_ext import (string_array_type, num_total_chars, append_string_array_to)
+from sdc.str_arr_ext import (string_array_type, num_total_chars, append_string_array_to,
+                             str_arr_is_na, pre_alloc_string_array, str_arr_set_na)
 
 
 class TypeChecker:
@@ -137,9 +139,7 @@ def _append_single_numeric_impl(A, B):
         elif isinstance(B, (types.UniTuple, types.List)):
             # TODO: this heavily relies on B being a homogeneous tuple/list - find a better way
             # to resolve common dtype of heterogeneous sequence of arrays
-            np_dtypes = [numpy_support.as_dtype(A.dtype), numpy_support.as_dtype(B.dtype.dtype)]
-            np_common_dtype = numpy.find_common_type([], np_dtypes)
-            numba_common_dtype = numpy_support.from_dtype(np_common_dtype)
+            numba_common_dtype = find_common_dtype_from_numpy_dtypes([A.dtype, B.dtype.dtype], [])
 
             # TODO: refactor to use numpy.concatenate when Numba supports building a tuple at runtime
             def _append_list_numeric_impl(A, B):
@@ -191,9 +191,12 @@ def _append_list_string_array_impl(A, B):
 
 @numba.njit
 def _hpat_ensure_array_capacity(new_size, arr):
-    """Function creating a copy of numpy array with a size more than specified"""
+    """ Function ensuring that the size of numpy array is at least as specified
+        Returns newly allocated array of bigger size with copied elements if existing size is less than requested
+    """
+
     k = len(arr)
-    if k > new_size:
+    if k >= new_size:
         return arr
 
     n = k
@@ -203,11 +206,238 @@ def _hpat_ensure_array_capacity(new_size, arr):
     res[:k] = arr[:k]
     return res
 
-def find_common_dtype_for_scalar_numpy_types(dtype1, dtype2):
-    """Used to find common numba dtype for two numba dtypes each representing some scalar numpy dtype"""
-    np_dtypes = [numpy_support.as_dtype(dtype1),
-                 numpy_support.as_dtype(dtype2)]
-    np_common_dtype = numpy.find_common_type([], np_dtypes)
+
+def find_common_dtype_from_numpy_dtypes(array_types, scalar_types):
+    """Used to find common numba dtype for a sequences of numba dtypes each representing some numpy dtype"""
+    np_array_dtypes = [numpy_support.as_dtype(dtype) for dtype in array_types]
+    np_scalar_dtypes = [numpy_support.as_dtype(dtype) for dtype in scalar_types]
+    np_common_dtype = numpy.find_common_type(np_array_dtypes, np_scalar_dtypes)
     numba_common_dtype = numpy_support.from_dtype(np_common_dtype)
 
-    return numba_common_dtype
\ No newline at end of file
+    return numba_common_dtype
+
+
+def hpat_join_series_indexes(left, right):
+    pass
+
+
+@overload(hpat_join_series_indexes)
+def hpat_join_series_indexes_overload(left, right):
+    """Function for joining arrays left and right in a way similar to pandas.join 'outer' algorithm"""
+
+    # TODO: eliminate code duplication by merging implementations for numeric and StringArray
+    # requires equivalents of numpy.arsort and _hpat_ensure_array_capacity for StringArrays
+    if (isinstance(left, types.Array) and isinstance(right, types.Array)):
+
+        numba_common_dtype = find_common_dtype_from_numpy_dtypes([left.dtype, right.dtype], [])
+        if isinstance(numba_common_dtype, types.Number):
+
+            def hpat_join_series_indexes_impl(left, right):
+
+                # allocate result arrays
+                lsize = len(left)
+                rsize = len(right)
+                est_total_size = int(1.1 * (lsize + rsize))
+
+                lidx = numpy.empty(est_total_size, numpy.int64)
+                ridx = numpy.empty(est_total_size, numpy.int64)
+                joined = numpy.empty(est_total_size, numba_common_dtype)
+
+                # sort arrays saving the old positions
+                sorted_left = numpy.argsort(left, kind='mergesort')
+                sorted_right = numpy.argsort(right, kind='mergesort')
+
+                i, j, k = 0, 0, 0
+                while (i < lsize and j < rsize):
+                    joined = _hpat_ensure_array_capacity(k + 1, joined)
+                    lidx = _hpat_ensure_array_capacity(k + 1, lidx)
+                    ridx = _hpat_ensure_array_capacity(k + 1, ridx)
+
+                    left_index = left[sorted_left[i]]
+                    right_index = right[sorted_right[j]]
+
+                    if (left_index < right_index):
+                        joined[k] = left_index
+                        lidx[k] = sorted_left[i]
+                        ridx[k] = -1
+                        i += 1
+                        k += 1
+                    elif (left_index > right_index):
+                        joined[k] = right_index
+                        lidx[k] = -1
+                        ridx[k] = sorted_right[j]
+                        j += 1
+                        k += 1
+                    else:
+                        # find ends of sequences of equal index values in left and right
+                        ni, nj = i, j
+                        while (ni < lsize and left[sorted_left[ni]] == left_index):
+                            ni += 1
+                        while (nj < rsize and right[sorted_right[nj]] == right_index):
+                            nj += 1
+
+                        # join the blocks found into results
+                        for s in numpy.arange(i, ni, 1):
+                            block_size = nj - j
+                            to_joined = numpy.repeat(left_index, block_size)
+                            to_lidx = numpy.repeat(sorted_left[s], block_size)
+                            to_ridx = numpy.array([sorted_right[k] for k in numpy.arange(j, nj, 1)], numpy.int64)
+
+                            joined = _hpat_ensure_array_capacity(k + block_size, joined)
+                            lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
+                            ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
+
+                            joined[k:k + block_size] = to_joined
+                            lidx[k:k + block_size] = to_lidx
+                            ridx[k:k + block_size] = to_ridx
+                            k += block_size
+                        i = ni
+                        j = nj
+
+                # fill the end of joined with remaining part of left or right
+                if i < lsize:
+                    block_size = lsize - i
+                    joined = _hpat_ensure_array_capacity(k + block_size, joined)
+                    lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
+                    ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
+                    ridx[k: k + block_size] = numpy.repeat(-1, block_size)
+                    while i < lsize:
+                        joined[k] = left[sorted_left[i]]
+                        lidx[k] = sorted_left[i]
+                        i += 1
+                        k += 1
+
+                elif j < rsize:
+                    block_size = rsize - j
+                    joined = _hpat_ensure_array_capacity(k + block_size, joined)
+                    lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
+                    ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
+                    lidx[k: k + block_size] = numpy.repeat(-1, block_size)
+                    while j < rsize:
+                        joined[k] = right[sorted_right[j]]
+                        ridx[k] = sorted_right[j]
+                        j += 1
+                        k += 1
+
+                return joined[:k], lidx[:k], ridx[:k]
+
+            return hpat_join_series_indexes_impl
+
+        else:
+            # TODO: support joining indexes with common dtype=object - requires Numba
+            # support of such numpy arrays in nopython mode, for now just return None
+            return None
+
+    elif (left == string_array_type and right == string_array_type):
+
+        def hpat_join_series_indexes_impl(left, right):
+
+            # allocate result arrays
+            lsize = len(left)
+            rsize = len(right)
+            est_total_size = int(1.1 * (lsize + rsize))
+
+            lidx = numpy.empty(est_total_size, numpy.int64)
+            ridx = numpy.empty(est_total_size, numpy.int64)
+
+            # use Series.sort_values since argsort for StringArrays not implemented
+            original_left_series = pandas.Series(left)
+            original_right_series = pandas.Series(right)
+
+            # sort arrays saving the old positions
+            left_series = original_left_series.sort_values(kind='mergesort')
+            right_series = original_right_series.sort_values(kind='mergesort')
+            sorted_left = left_series._index
+            sorted_right = right_series._index
+
+            i, j, k = 0, 0, 0
+            while (i < lsize and j < rsize):
+                lidx = _hpat_ensure_array_capacity(k + 1, lidx)
+                ridx = _hpat_ensure_array_capacity(k + 1, ridx)
+
+                left_index = left[sorted_left[i]]
+                right_index = right[sorted_right[j]]
+
+                if (left_index < right_index):
+                    lidx[k] = sorted_left[i]
+                    ridx[k] = -1
+                    i += 1
+                    k += 1
+                elif (left_index > right_index):
+                    lidx[k] = -1
+                    ridx[k] = sorted_right[j]
+                    j += 1
+                    k += 1
+                else:
+                    # find ends of sequences of equal index values in left and right
+                    ni, nj = i, j
+                    while (ni < lsize and left[sorted_left[ni]] == left_index):
+                        ni += 1
+                    while (nj < rsize and right[sorted_right[nj]] == right_index):
+                        nj += 1
+
+                    # join the blocks found into results
+                    for s in numpy.arange(i, ni, 1):
+                        block_size = nj - j
+                        to_lidx = numpy.repeat(sorted_left[s], block_size)
+                        to_ridx = numpy.array([sorted_right[k] for k in numpy.arange(j, nj, 1)], numpy.int64)
+
+                        lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
+                        ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
+
+                        lidx[k:k + block_size] = to_lidx
+                        ridx[k:k + block_size] = to_ridx
+                        k += block_size
+                    i = ni
+                    j = nj
+
+            # fill the end of joined with remaining part of left or right
+            if i < lsize:
+                block_size = lsize - i
+                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
+                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
+                ridx[k: k + block_size] = numpy.repeat(-1, block_size)
+                while i < lsize:
+                    lidx[k] = sorted_left[i]
+                    i += 1
+                    k += 1
+
+            elif j < rsize:
+                block_size = rsize - j
+                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
+                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
+                lidx[k: k + block_size] = numpy.repeat(-1, block_size)
+                while j < rsize:
+                    ridx[k] = sorted_right[j]
+                    j += 1
+                    k += 1
+
+            # count total number of characters and allocate joined array
+            total_joined_size = k
+            num_chars_in_joined = 0
+            for i in numpy.arange(total_joined_size):
+                if lidx[i] != -1:
+                    num_chars_in_joined += len(left[lidx[i]])
+                elif ridx[i] != -1:
+                    num_chars_in_joined += len(right[ridx[i]])
+
+            joined = pre_alloc_string_array(total_joined_size, num_chars_in_joined)
+
+            # iterate over joined and fill it with indexes using lidx and ridx indexers
+            for i in numpy.arange(total_joined_size):
+                if lidx[i] != -1:
+                    joined[i] = left[lidx[i]]
+                    if (str_arr_is_na(left, lidx[i])):
+                        str_arr_set_na(joined, i)
+                elif ridx[i] != -1:
+                    joined[i] = right[ridx[i]]
+                    if (str_arr_is_na(right, ridx[i])):
+                        str_arr_set_na(joined, i)
+                else:
+                    str_arr_set_na(joined, i)
+
+            return joined, lidx, ridx
+
+        return hpat_join_series_indexes_impl
+
+    return None
diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py
index 22361bd87..9fa9ebbb3 100644
--- a/sdc/datatypes/hpat_pandas_series_functions.py
+++ b/sdc/datatypes/hpat_pandas_series_functions.py
@@ -37,13 +37,12 @@
 from numba.errors import TypingError
 from numba.extending import overload, overload_method, overload_attribute
 from numba import types
-from numba import numpy_support
 
 import sdc
 import sdc.datatypes.common_functions as common_functions
 from sdc.datatypes.common_functions import TypeChecker
-from sdc.datatypes.common_functions import (_hpat_ensure_array_capacity, check_index_is_numeric,
-                                            find_common_dtype_for_scalar_numpy_types)
+from sdc.datatypes.common_functions import (check_index_is_numeric, find_common_dtype_from_numpy_dtypes,
+                                            hpat_join_series_indexes)
 from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
 from sdc.hiframes.pd_series_ext import SeriesType
 from sdc.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars, string_array_type,
@@ -3728,229 +3727,14 @@ def hpat_pandas_series_pct_change_impl(self, periods=1, fill_method='pad', limit
     return hpat_pandas_series_pct_change_impl
 
 
-def hpat_join_series_indexes(left, right):
-    pass
-
-
-@overload(hpat_join_series_indexes)
-def hpat_join_series_indexes_overload(left, right):
-    '''Function for joining arrays left and right in a way similar to pandas.join 'outer' algorithm'''
-
-    if (isinstance(left, types.Array) and isinstance(right, types.Array)):
-        np_dtypes = [numpy_support.as_dtype(left.dtype), numpy_support.as_dtype(right.dtype)]
-        np_common_dtype = numpy.find_common_type([], np_dtypes)
-        numba_common_dtype = numpy_support.from_dtype(np_common_dtype)
-        if (isinstance(left.dtype, types.Number) and isinstance(right.dtype, types.Number)):
-
-            def hpat_join_series_indexes_impl(left, right):
-
-                # allocate result arrays
-                lsize = len(left)
-                rsize = len(right)
-                est_total_size = int(1.1 * (lsize + rsize))
-
-                lidx = numpy.empty(est_total_size, numpy.int64)
-                ridx = numpy.empty(est_total_size, numpy.int64)
-                joined = numpy.empty(est_total_size, numba_common_dtype)
-
-                # sort arrays saving the old positions
-                sorted_left = numpy.argsort(left, kind='mergesort')
-                sorted_right = numpy.argsort(right, kind='mergesort')
-
-                i, j, k = 0, 0, 0
-                while (i < lsize and j < rsize):
-                    joined = _hpat_ensure_array_capacity(k, joined)
-                    lidx = _hpat_ensure_array_capacity(k, lidx)
-                    ridx = _hpat_ensure_array_capacity(k, ridx)
-
-                    left_index = left[sorted_left[i]]
-                    right_index = right[sorted_right[j]]
-
-                    if (left_index < right_index):
-                        joined[k] = left_index
-                        lidx[k] = sorted_left[i]
-                        ridx[k] = -1
-                        i += 1
-                        k += 1
-                    elif (left_index > right_index):
-                        joined[k] = right_index
-                        lidx[k] = -1
-                        ridx[k] = sorted_right[j]
-                        j += 1
-                        k += 1
-                    else:
-                        # find ends of sequences of equivalent index values in left and right
-                        ni, nj = i, j
-                        while (ni < lsize and left[sorted_left[ni]] == left_index):
-                            ni += 1
-                        while (nj < rsize and right[sorted_right[nj]] == right_index):
-                            nj += 1
-
-                        # join the blocks found into results
-                        for s in numpy.arange(i, ni, 1):
-                            block_size = nj - j
-                            to_joined = numpy.repeat(left_index, block_size)
-                            to_lidx = numpy.repeat(sorted_left[s], block_size)
-                            to_ridx = numpy.array([sorted_right[k] for k in numpy.arange(j, nj, 1)], numpy.int64)
-
-                            joined = _hpat_ensure_array_capacity(k + block_size, joined)
-                            lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
-                            ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
-
-                            joined[k:k + block_size] = to_joined
-                            lidx[k:k + block_size] = to_lidx
-                            ridx[k:k + block_size] = to_ridx
-                            k += block_size
-                        i = ni
-                        j = nj
-
-                # fill the end of joined with remaining part of left or right
-                if i < lsize:
-                    block_size = lsize - i
-                    joined = _hpat_ensure_array_capacity(k + block_size, joined)
-                    lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
-                    ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
-                    ridx[k: k + block_size] = numpy.repeat(-1, block_size)
-                    while i < lsize:
-                        joined[k] = left[sorted_left[i]]
-                        lidx[k] = sorted_left[i]
-                        i += 1
-                        k += 1
-
-                elif j < rsize:
-                    block_size = rsize - j
-                    joined = _hpat_ensure_array_capacity(k + block_size, joined)
-                    lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
-                    ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
-                    lidx[k: k + block_size] = numpy.repeat(-1, block_size)
-                    while j < rsize:
-                        joined[k] = right[sorted_right[j]]
-                        ridx[k] = sorted_right[j]
-                        j += 1
-                        k += 1
-
-                return joined[:k], lidx[:k], ridx[:k]
-
-            return hpat_join_series_indexes_impl
-
-    elif (left == string_array_type and right == string_array_type):
-
-        def hpat_join_series_indexes_impl(left, right):
-
-            # allocate result arrays
-            lsize = len(left)
-            rsize = len(right)
-            est_total_size = int(1.1 * (lsize + rsize))
-
-            lidx = numpy.empty(est_total_size, numpy.int64)
-            ridx = numpy.empty(est_total_size, numpy.int64)
-
-            # use Series.sort_values since argsort for StringArrays not implemented
-            original_left_series = pandas.Series(left)
-            original_right_series = pandas.Series(right)
-
-            # sort arrays saving the old positions
-            left_series = original_left_series.sort_values(kind='mergesort')
-            right_series = original_right_series.sort_values(kind='mergesort')
-            sorted_left = left_series._index
-            sorted_right = right_series._index
-
-            i, j, k = 0, 0, 0
-            while (i < lsize and j < rsize):
-                lidx = _hpat_ensure_array_capacity(k, lidx)
-                ridx = _hpat_ensure_array_capacity(k, ridx)
-
-                left_index = left[sorted_left[i]]
-                right_index = right[sorted_right[j]]
-
-                if (left_index < right_index):
-                    lidx[k] = sorted_left[i]
-                    ridx[k] = -1
-                    i += 1
-                    k += 1
-                elif (left_index > right_index):
-                    lidx[k] = -1
-                    ridx[k] = sorted_right[j]
-                    j += 1
-                    k += 1
-                else:
-                    # find ends of sequences of equivalent index values in left and right
-                    ni, nj = i, j
-                    while (ni < lsize and left[sorted_left[ni]] == left_index):
-                        ni += 1
-                    while (nj < rsize and right[sorted_right[nj]] == right_index):
-                        nj += 1
-
-                    # join the blocks found into results
-                    for s in numpy.arange(i, ni, 1):
-                        block_size = nj - j
-                        to_lidx = numpy.repeat(sorted_left[s], block_size)
-                        to_ridx = numpy.array([sorted_right[k] for k in numpy.arange(j, nj, 1)], numpy.int64)
-
-                        lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
-                        ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
-
-                        lidx[k:k + block_size] = to_lidx
-                        ridx[k:k + block_size] = to_ridx
-                        k += block_size
-                    i = ni
-                    j = nj
-
-            # fill the end of joined with remaining part of left or right
-            if i < lsize:
-                block_size = lsize - i
-                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
-                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
-                ridx[k: k + block_size] = numpy.repeat(-1, block_size)
-                while i < lsize:
-                    lidx[k] = sorted_left[i]
-                    i += 1
-                    k += 1
-
-            elif j < rsize:
-                block_size = rsize - j
-                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
-                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
-                lidx[k: k + block_size] = numpy.repeat(-1, block_size)
-                while j < rsize:
-                    ridx[k] = sorted_right[j]
-                    j += 1
-                    k += 1
-
-            # count total number of characters and allocate joined array
-            total_joined_size = k
-            num_chars_in_joined = 0
-            for i in numpy.arange(total_joined_size):
-                if lidx[i] != -1:
-                    num_chars_in_joined += len(left[lidx[i]])
-                elif ridx[i] != -1:
-                    num_chars_in_joined += len(right[ridx[i]])
-
-            joined = pre_alloc_string_array(total_joined_size, num_chars_in_joined)
-
-            # iterate over joined and fill it with indexes using lidx and ridx indexers
-            for i in numpy.arange(total_joined_size):
-                if lidx[i] != -1:
-                    joined[i] = left[lidx[i]]
-                    if (str_arr_is_na(left, lidx[i])):
-                        str_arr_set_na(joined, i)
-                elif ridx[i] != -1:
-                    joined[i] = right[ridx[i]]
-                    if (str_arr_is_na(right, ridx[i])):
-                        str_arr_set_na(joined, i)
-                else:
-                    str_arr_set_na(joined, i)
-
-            return joined, lidx, ridx
-
-        return hpat_join_series_indexes_impl
-
-
 @overload(operator.add)
 def hpat_pandas_series_operator_add(self, other):
     """
     Pandas Series operator :attr:`pandas.Series.add` implementation
 
+    Note: Currently implemented for numeric Series only.
+        Differs from Pandas in returning Series with fixed dtype :obj:`float64`
+
     .. only:: developer
 
     **Test**: python -m hpat.runtests sdc.tests.test_series.TestSeries.test_series_op1
@@ -3981,15 +3765,15 @@ def hpat_pandas_series_operator_add(self, other):
         ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')
 
     series_indexes_alignable = False
+    none_or_numeric_indexes = False
     if isinstance(other, SeriesType):
         if (other.index == string_array_type and self.index == string_array_type):
             series_indexes_alignable = True
 
-        if ((isinstance(self.index, types.NoneType) or
-             isinstance(self.index, types.Array) and isinstance(self.index.dtype, types.Number))
-            and (isinstance(other.index, types.NoneType) or
-                 isinstance(other.index, types.Array) and isinstance(other.index.dtype, types.Number))):
+        if ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self))
+                and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))):
             series_indexes_alignable = True
+            none_or_numeric_indexes = True
 
     if isinstance(other, SeriesType) and not series_indexes_alignable:
         raise TypingError('{} Not implemented for series with not-alignable indexes. \
@@ -4003,16 +3787,8 @@ def hpat_pandas_series_add_scalar_impl(self, other):
         return hpat_pandas_series_add_scalar_impl
 
     elif (isinstance(other, SeriesType)):
-        is_numeric_index = isinstance(self.index, (types.Array, types.NoneType))
-
-        if is_numeric_index:
-            ty_left_index_dtype = types.int64 if isinstance(self.index, types.NoneType) else self.index.dtype
-            ty_right_index_dtype = types.int64 if isinstance(other.index, types.NoneType) else other.index.dtype
-            np_index_dtypes = [numpy_support.as_dtype(ty_left_index_dtype),
-                               numpy_support.as_dtype(ty_right_index_dtype)]
-            np_index_common_dtype = numpy.find_common_type([], np_index_dtypes)
-            numba_index_common_dtype = numpy_support.from_dtype(np_index_common_dtype)
 
+        # optimization for series with default indexes, that can be aligned differently
         if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)):
             def hpat_pandas_series_add_impl(self, other):
 
@@ -4026,12 +3802,21 @@ def hpat_pandas_series_add_impl(self, other):
                     new_data[min_data_size:] = numpy.repeat(numpy.nan, max_data_size - min_data_size)
 
                     return pandas.Series(new_data, self._index)
+
+            return hpat_pandas_series_add_impl
         else:
+            # for numeric indexes find common dtype to be used when creating joined index
+            if none_or_numeric_indexes:
+                ty_left_index_dtype = types.int64 if isinstance(self.index, types.NoneType) else self.index.dtype
+                ty_right_index_dtype = types.int64 if isinstance(other.index, types.NoneType) else other.index.dtype
+                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
+                    [ty_left_index_dtype, ty_right_index_dtype], [])
+
             def hpat_pandas_series_add_impl(self, other):
                 left_index, right_index = self.index, other.index
 
                 # check if indexes are equal and series don't have to be aligned
-                if is_numeric_index == True:  # noqa
+                if none_or_numeric_indexes == True:  # noqa
                     if (numpy.array_equal(left_index, right_index)):
                         return pandas.Series(numpy.asarray(self._data + other._data, numpy.float64),
                                              numpy.asarray(left_index, numba_index_common_dtype))
@@ -4048,7 +3833,7 @@ def hpat_pandas_series_add_impl(self, other):
                         return pandas.Series(numpy.asarray(self._data + other._data, numpy.float64),
                                              self._index)
 
-                # TODO: replace below with core join(how='outer') when implemented
+                # TODO: replace below with core join(how='outer', return_indexers=True) when implemented
                 joined_index, left_indexer, right_indexer = hpat_join_series_indexes(left_index, right_index)
 
                 joined_index_range = numpy.arange(len(joined_index))
@@ -4066,4 +3851,6 @@ def hpat_pandas_series_add_impl(self, other):
 
                 return pandas.Series(left_values + right_values, joined_index)
 
-    return hpat_pandas_series_add_impl
+            return hpat_pandas_series_add_impl
+
+    return None
diff --git a/sdc/hiframes/pd_series_ext.py b/sdc/hiframes/pd_series_ext.py
index 037658f68..f2e28be4a 100644
--- a/sdc/hiframes/pd_series_ext.py
+++ b/sdc/hiframes/pd_series_ext.py
@@ -914,15 +914,15 @@ def __init__(self, stype):
         super(SeriesIatType, self).__init__(name)
 
 
-# PR135. This needs to be commented out
-@infer_global(operator.getitem)
-class GetItemSeriesIat(AbstractTemplate):
-    key = operator.getitem
+if sdc.config.config_pipeline_hpat_default:
+    @infer_global(operator.getitem)
+    class GetItemSeriesIat(AbstractTemplate):
+        key = operator.getitem
 
-    def generic(self, args, kws):
-        # iat[] is the same as regular getitem
-        if isinstance(args[0], SeriesIatType):
-            return GetItemSeries.generic(self, (args[0].stype, args[1]), kws)
+        def generic(self, args, kws):
+            # iat[] is the same as regular getitem
+            if isinstance(args[0], SeriesIatType):
+                return GetItemSeries.generic(self, (args[0].stype, args[1]), kws)
 
 
 @infer
@@ -1031,7 +1031,7 @@ def generic_expand_cumulative_series(self, args, kws):
 _non_hpat_pipeline_attrs = [
     'resolve_append', 'resolve_combine', 'resolve_corr', 'resolve_cov',
     'resolve_dropna', 'resolve_fillna', 'resolve_head', 'resolve_nlargest',
-    'resolve_nsmallest', 'resolve_pct_change'
+    'resolve_nsmallest', 'resolve_pct_change', 'resolve_loc'
 ]
 
 # use ArrayAttribute for attributes not defined in SeriesAttribute
@@ -1047,72 +1047,72 @@ def generic_expand_cumulative_series(self, args, kws):
         if attr in SeriesAttribute.__dict__:
             delattr(SeriesAttribute, attr)
 
-# PR135. This needs to be commented out
-@infer_global(operator.getitem)
-class GetItemSeries(AbstractTemplate):
-    key = operator.getitem
-
-    def generic(self, args, kws):
-        assert not kws
-        [in_arr, in_idx] = args
-        is_arr_series = False
-        is_idx_series = False
-        is_arr_dt_index = False
-
-        if not isinstance(in_arr, SeriesType) and not isinstance(in_idx, SeriesType):
-            return None
-
-        if isinstance(in_arr, SeriesType):
-            in_arr = series_to_array_type(in_arr)
-            is_arr_series = True
-            if in_arr.dtype == types.NPDatetime('ns'):
-                is_arr_dt_index = True
-
-        if isinstance(in_idx, SeriesType):
-            in_idx = series_to_array_type(in_idx)
-            is_idx_series = True
-
-        # TODO: dt_index
-        if in_arr == string_array_type:
-            # XXX fails due in overload
-            # compile_internal version results in symbol not found!
-            # sig = self.context.resolve_function_type(
-            #     operator.getitem, (in_arr, in_idx), kws)
-            # HACK to get avoid issues for now
-            if isinstance(in_idx, (types.Integer, types.IntegerLiteral)):
-                sig = string_type(in_arr, in_idx)
-            else:
-                sig = GetItemStringArray.generic(self, (in_arr, in_idx), kws)
-        elif in_arr == list_string_array_type:
-            # TODO: split view
-            # mimic array indexing for list
-            if (isinstance(in_idx, types.Array) and in_idx.ndim == 1
-                    and isinstance(
-                        in_idx.dtype, (types.Integer, types.Boolean))):
-                sig = signature(in_arr, in_arr, in_idx)
-            else:
-                sig = numba.typing.collections.GetItemSequence.generic(
+if sdc.config.config_pipeline_hpat_default:
+    @infer_global(operator.getitem)
+    class GetItemSeries(AbstractTemplate):
+        key = operator.getitem
+
+        def generic(self, args, kws):
+            assert not kws
+            [in_arr, in_idx] = args
+            is_arr_series = False
+            is_idx_series = False
+            is_arr_dt_index = False
+
+            if not isinstance(in_arr, SeriesType) and not isinstance(in_idx, SeriesType):
+                return None
+
+            if isinstance(in_arr, SeriesType):
+                in_arr = series_to_array_type(in_arr)
+                is_arr_series = True
+                if in_arr.dtype == types.NPDatetime('ns'):
+                    is_arr_dt_index = True
+
+            if isinstance(in_idx, SeriesType):
+                in_idx = series_to_array_type(in_idx)
+                is_idx_series = True
+
+            # TODO: dt_index
+            if in_arr == string_array_type:
+                # XXX fails due in overload
+                # compile_internal version results in symbol not found!
+                # sig = self.context.resolve_function_type(
+                #     operator.getitem, (in_arr, in_idx), kws)
+                # HACK to get avoid issues for now
+                if isinstance(in_idx, (types.Integer, types.IntegerLiteral)):
+                    sig = string_type(in_arr, in_idx)
+                else:
+                    sig = GetItemStringArray.generic(self, (in_arr, in_idx), kws)
+            elif in_arr == list_string_array_type:
+                # TODO: split view
+                # mimic array indexing for list
+                if (isinstance(in_idx, types.Array) and in_idx.ndim == 1
+                        and isinstance(
+                            in_idx.dtype, (types.Integer, types.Boolean))):
+                    sig = signature(in_arr, in_arr, in_idx)
+                else:
+                    sig = numba.typing.collections.GetItemSequence.generic(
+                        self, (in_arr, in_idx), kws)
+            elif in_arr == string_array_split_view_type:
+                sig = GetItemStringArraySplitView.generic(
                     self, (in_arr, in_idx), kws)
-        elif in_arr == string_array_split_view_type:
-            sig = GetItemStringArraySplitView.generic(
-                self, (in_arr, in_idx), kws)
-        else:
-            out = get_array_index_type(in_arr, in_idx)
-            sig = signature(out.result, in_arr, out.index)
-
-        if sig is not None:
-            arg1 = sig.args[0]
-            arg2 = sig.args[1]
-            if is_arr_series:
-                sig.return_type = if_arr_to_series_type(sig.return_type)
-                arg1 = if_arr_to_series_type(arg1)
-            if is_idx_series:
-                arg2 = if_arr_to_series_type(arg2)
-            sig.args = (arg1, arg2)
-            # dt_index and Series(dt64) should return Timestamp
-            if is_arr_dt_index and sig.return_type == types.NPDatetime('ns'):
-                sig.return_type = pandas_timestamp_type
-        return sig
+            else:
+                out = get_array_index_type(in_arr, in_idx)
+                sig = signature(out.result, in_arr, out.index)
+
+            if sig is not None:
+                arg1 = sig.args[0]
+                arg2 = sig.args[1]
+                if is_arr_series:
+                    sig.return_type = if_arr_to_series_type(sig.return_type)
+                    arg1 = if_arr_to_series_type(arg1)
+                if is_idx_series:
+                    arg2 = if_arr_to_series_type(arg2)
+                sig.args = (arg1, arg2)
+                # dt_index and Series(dt64) should return Timestamp
+                if is_arr_dt_index and sig.return_type == types.NPDatetime('ns'):
+                    sig.return_type = pandas_timestamp_type
+            return sig
 
 
 @infer_global(operator.setitem)
diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py
index 3c85a607e..3f836780c 100644
--- a/sdc/tests/test_series.py
+++ b/sdc/tests/test_series.py
@@ -1050,6 +1050,7 @@ def test_series_op1(self):
             test_impl = _make_func_use_binop1(operator)
             hpat_func = self.jit(test_impl)
 
+            # TODO: extend to test arithmetic operations between numeric Series of different dtypes
             n = 11
             df = pd.DataFrame({'A': np.arange(1, n), 'B': np.ones(n - 1)})
             pd.testing.assert_series_equal(hpat_func(df.A, df.B), test_impl(df.A, df.B), check_names=False)
@@ -1062,6 +1063,7 @@ def test_series_op2(self):
             test_impl = _make_func_use_binop1(operator)
             hpat_func = self.jit(test_impl)
 
+            # TODO: extend to test arithmetic operations between numeric Series of different dtypes
             n = 11
             if platform.system() == 'Windows' and not IS_32BITS:
                 df = pd.DataFrame({'A': np.arange(1, n, dtype=np.int64)})
@@ -1069,26 +1071,30 @@ def test_series_op2(self):
                 df = pd.DataFrame({'A': np.arange(1, n)})
             pd.testing.assert_series_equal(hpat_func(df.A, 1), test_impl(df.A, 1), check_names=False)
 
+    @skip_numba_jit('Not implemented in new-pipeline yet')
     def test_series_op3(self):
-        arithmetic_binops = ('+', '-', '*', '/', '//', '%', '**')
+        arithmetic_binops = ('+=', '-=', '*=', '/=', '//=', '%=', '**=')
 
         for operator in arithmetic_binops:
             test_impl = _make_func_use_binop2(operator)
             hpat_func = self.jit(test_impl)
 
+            # TODO: extend to test arithmetic operations between numeric Series of different dtypes
             n = 11
-            df = pd.DataFrame({'A': np.arange(1, n), 'B': np.ones(n - 1)})
+            df = pd.DataFrame({'A': np.arange(1, n, dtype=np.float64), 'B': np.ones(n - 1)})
             pd.testing.assert_series_equal(hpat_func(df.A, df.B), test_impl(df.A, df.B), check_names=False)
 
+    @skip_numba_jit('Not implemented in new-pipeline yet')
     def test_series_op4(self):
-        arithmetic_binops = ('+', '-', '*', '/', '//', '%', '**')
+        arithmetic_binops = ('+=', '-=', '*=', '/=', '//=', '%=', '**=')
 
         for operator in arithmetic_binops:
             test_impl = _make_func_use_binop2(operator)
             hpat_func = self.jit(test_impl)
 
+            # TODO: extend to test arithmetic operations between numeric Series of different dtypes
             n = 11
-            df = pd.DataFrame({'A': np.arange(1, n)})
+            df = pd.DataFrame({'A': np.arange(1, n, dtype=np.float64)})
             pd.testing.assert_series_equal(hpat_func(df.A, 1), test_impl(df.A, 1), check_names=False)
 
     def test_series_op5(self):
@@ -4579,66 +4585,6 @@ def test_series_pct_change_impl(S, periods=1, fill_method='pad', limit=None, fre
         msg = 'Method pct_change(). The object periods'
         self.assertIn(msg, str(raises.exception))
 
-    def test_series_setitem_for_value(self):
-        def test_impl(S, val):
-            S[3] = val
-            return S
-
-        hpat_func = self.jit(test_impl)
-        S = pd.Series([0, 1, 2, 3, 4])
-        value = 50
-        result_ref = test_impl(S, value)
-        result = hpat_func(S, value)
-        pd.testing.assert_series_equal(result_ref, result)
-
-    def test_series_setitem_for_slice(self):
-        def test_impl(S, val):
-            S[2:] = val
-            return S
-
-        hpat_func = self.jit(test_impl)
-        S = pd.Series([0, 1, 2, 3, 4])
-        value = 50
-        result_ref = test_impl(S, value)
-        result = hpat_func(S, value)
-        pd.testing.assert_series_equal(result_ref, result)
-
-    def test_series_setitem_for_series(self):
-        def test_impl(S, ind, val):
-            S[ind] = val
-            return S
-
-        hpat_func = self.jit(test_impl)
-        S = pd.Series([0, 1, 2, 3, 4])
-        ind = pd.Series([0, 2, 4])
-        value = 50
-        result_ref = test_impl(S, ind, value)
-        result = hpat_func(S, ind, value)
-        pd.testing.assert_series_equal(result_ref, result)
-
-    def test_series_setitem_unsupported(self):
-        def test_impl(S, ind, val):
-            S[ind] = val
-            return S
-
-        hpat_func = self.jit(test_impl)
-        S = pd.Series([0, 1, 2, 3, 4, 5])
-        ind1 = 5
-        ind2 = '3'
-        value1 = 'ababa'
-        value2 = 101
-
-        with self.assertRaises(TypingError) as raises:
-            hpat_func(S, ind1, value1)
-        msg = 'Operator setitem(). Value must be one type with series.'
-        self.assertIn(msg, str(raises.exception))
-
-        with self.assertRaises(TypingError) as raises:
-            hpat_func(S, ind2, value2)
-        msg = 'Operator setitem(). The index must be an Integer, Slice or a pandas.series.'
-        self.assertIn(msg, str(raises.exception))
-
-
     @skip_sdc_jit('Arithmetic operations on Series with non-default indexes are not supported in old-style')
     def test_series_operator_add_numeric_scalar(self):
         """Verifies Series.operator.add implementation for numeric series and scalar second operand"""
@@ -4687,24 +4633,6 @@ def test_impl(A, B):
                 B = pd.Series(np.arange(n)**2, dtype=dtype_right)
                 pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
 
-    def test_series_operator_add_series_dtype_promotion(self):
-        """Verifies implementation of Series.operator.add between two numeric Series of different dtypes"""
-        def test_impl(A, B):
-            return A + B
-        hpat_func = self.jit(test_impl)
-
-        n = 7
-        A = pd.Series(np.array(np.arange(n), dtype=np.int32))
-        B = pd.Series(np.array(np.arange(n)**2, dtype=np.float32))
-        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
-
-        dtypes_to_test = (np.int32, np.int64, np.float32, np.float64)
-        for dtype_left, dtype_right in combinations(dtypes_to_test, 2):
-            with self.subTest(left_series_dtype=dtype_left, right_series_dtype=dtype_right):
-                A = pd.Series(np.array(np.arange(n), dtype=dtype_left))
-                B = pd.Series(np.array(np.arange(n)**2, dtype=dtype_right))
-                pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False)
-
     @skip_numba_jit
     @skip_sdc_jit("TODO: find out why pandas aligning series indexes produces Int64Index when common dtype is float\n"
                   "AssertionError: Series.index are different\n"
@@ -4791,7 +4719,8 @@ def test_impl(A, B):
         B = pd.Series(np.arange(n)**2, index=index_B)
         pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
 
-    @skip_sdc_jit("TODO: fix Series.sort_values to handle both None and '' in string series")
+    @skip_numba_jit('TODO: fix Series.sort_values to handle both None and '' in string series')
+    @skip_sdc_jit('Arithmetic operations on Series with non-default indexes are not supported in old-style')
     def test_series_operator_add_numeric_align_index_str_fixme(self):
         """Same as test_series_operator_add_align_index_str but with None values in string indexes"""
         def test_impl(A, B):
@@ -4864,7 +4793,8 @@ def test_impl(A, B):
         B = pd.Series(np.random.ranf(n), index=index2)
         pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B), check_dtype=False, check_names=False)
 
-    @unittest.skip("TODO: support arithemetic operations on StringArrays and extend Series.operator.add overload")
+    @skip_numba_jit
+    @skip_sdc_jit("TODO: support arithemetic operations on StringArrays and extend Series.operator.add overload")
     def test_series_operator_add_str_same_index_default(self):
         """Verifies implementation of Series.operator.add between two string Series
         with default indexes and same size"""