From 87c6bf2bd271140d2c5d4523e81aae6fa5e20e8e Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Mon, 23 Jul 2018 16:03:08 -0400 Subject: [PATCH 01/36] new series type independent of types.Array --- hpat/pd_series_ext.py | 114 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 98 insertions(+), 16 deletions(-) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index ebb32f246..dcaa3e7c3 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -3,22 +3,32 @@ type_callable, infer) from numba.typing.templates import (infer_global, AbstractTemplate, signature, AttributeTemplate) +from numba.typing.arraydecl import get_array_index_type import hpat from hpat.str_ext import string_type from hpat.str_arr_ext import (string_array_type, offset_typ, char_typ, str_arr_payload_type, StringArrayType) +from hpat.pd_timestamp_ext import pandas_timestamp_type # TODO: implement type inference instead of subtyping array since Pandas as of # 0.23 is deprecating things like itemsize etc. -class SeriesType(types.Array): +# class SeriesType(types.ArrayCompatible): +class SeriesType(types.IterableType): """Temporary type class for Series objects. """ - array_priority = 1000 + # array_priority = 1000 def __init__(self, dtype, ndim, layout, readonly=False, name=None, aligned=True): - # same as types.Array, except name is Series + # same as types.Array, except name is Series, and buffer attributes + # initialized here assert ndim == 1, "Series() should be one dimensional" assert name is None + self.mutable = True + self.aligned = True + self.dtype = dtype + self.ndim = ndim + self.layout = layout + if readonly: self.mutable = False if (not aligned or @@ -31,7 +41,15 @@ def __init__(self, dtype, ndim, layout, readonly=False, name=None, if not self.aligned: type_name = "unaligned " + type_name name = "%s(%s, %sd, %s)" % (type_name, dtype, ndim, layout) - super(SeriesType, self).__init__(dtype, ndim, layout, name=name) + super(SeriesType, self).__init__(name=name) + + @property + def mangling_args(self): + # same as types.Array + args = [self.dtype, self.ndim, self.layout, + 'mutable' if self.mutable else 'readonly', + 'aligned' if self.aligned else 'unaligned'] + return self.__class__.__name__, args def copy(self, dtype=None, ndim=None, layout=None, readonly=None): # same as types.Array, except Series return type @@ -46,6 +64,11 @@ def copy(self, dtype=None, ndim=None, layout=None, readonly=None): return SeriesType(dtype=dtype, ndim=ndim, layout=layout, readonly=readonly, aligned=self.aligned) + @property + def key(self): + # same as types.Array + return self.dtype, self.ndim, self.layout, self.mutable, self.aligned + def unify(self, typingctx, other): # same as types.Array, except returns Series for Series/Series # If other is array and the ndim matches @@ -64,6 +87,45 @@ def unify(self, typingctx, other): # XXX: unify Series/Array as Array return super(SeriesType, self).unify(typingctx, other) + # @property + # def as_array(self): + # return types.Array(self.dtype, self.ndim, self.layout) + + def can_convert_to(self, typingctx, other): + # same as types.Array, TODO: add Series? + if (isinstance(other, types.Array) and other.ndim == self.ndim + and other.dtype == self.dtype): + if (other.layout in ('A', self.layout) + and (self.mutable or not other.mutable) + and (self.aligned or not other.aligned)): + return types.Conversion.safe + + def is_precise(self): + # same as types.Array + return self.dtype.is_precise() + + @property + def iterator_type(self): + # same as Buffer + # TODO: fix timestamp + return types.iterators.ArrayIterator(self) + + @property + def is_c_contig(self): + # same as Buffer + return self.layout == 'C' or (self.ndim <= 1 and self.layout in 'CF') + + @property + def is_f_contig(self): + # same as Buffer + return self.layout == 'F' or (self.ndim <= 1 and self.layout in 'CF') + + @property + def is_contig(self): + # same as Buffer + return self.layout in 'CF' + + string_series_type = SeriesType(string_type, 1, 'C', True) # TODO: create a separate DatetimeIndex type from Series dt_index_series_type = SeriesType(types.NPDatetime('ns'), 1, 'C') @@ -153,25 +215,29 @@ def cast_string_series(context, builder, fromty, toty, val): def cast_series(context, builder, fromty, toty, val): return val +# --------------------------------------------------------------------------- # +# --- typing similar to arrays adopted from arraydecl.py, npydecl.py -------- # + + @infer_getattr -class ArrayAttribute(AttributeTemplate): +class SeriesAttribute(AttributeTemplate): key = SeriesType def resolve_values(self, ary): return series_to_array_type(ary, True) # TODO: use ops logic from pandas/core/ops.py -# called from numba/numpy_support.py:resolve_output_type -# similar to SmartArray (targets/smartarray.py) -@type_callable('__array_wrap__') -def type_series_array_wrap(context): - def typer(input_type, result): - if isinstance(input_type, SeriesType): - return input_type.copy(dtype=result.dtype, - ndim=result.ndim, - layout=result.layout) - - return typer +# # called from numba/numpy_support.py:resolve_output_type +# # similar to SmartArray (targets/smartarray.py) +# @type_callable('__array_wrap__') +# def type_series_array_wrap(context): +# def typer(input_type, result): +# if isinstance(input_type, SeriesType): +# return input_type.copy(dtype=result.dtype, +# ndim=result.ndim, +# layout=result.layout) + +# return typer @infer class SeriesCompEqual(AbstractTemplate): @@ -210,3 +276,19 @@ class CmpOpLESeries(SeriesCompEqual): @infer class CmpOpLTSeries(SeriesCompEqual): key = '<' + +# @infer +# class GetItemBuffer(AbstractTemplate): +# key = "getitem" + +# def generic(self, args, kws): +# assert not kws +# [ary, idx] = args +# import pdb; pdb.set_trace() +# if not isinstance(ary, SeriesType): +# return +# out = get_array_index_type(ary, idx) +# # check result to be dt64 since it might be sliced array +# # replace result with Timestamp +# if out is not None and out.result == types.NPDatetime('ns'): +# return signature(pandas_timestamp_type, ary, out.index) From 894903951c7b12867bd94462a4619384279231eb Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Mon, 23 Jul 2018 16:31:19 -0400 Subject: [PATCH 02/36] set SeriesAttribute typing from ArrayAttribute if not overwritten --- hpat/pd_series_ext.py | 7 +++++++ hpat/tests/test_series.py | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index dcaa3e7c3..4509ecb00 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -1,3 +1,4 @@ +import numba from numba import types from numba.extending import (models, register_model, lower_cast, infer_getattr, type_callable, infer) @@ -292,3 +293,9 @@ class CmpOpLTSeries(SeriesCompEqual): # # replace result with Timestamp # if out is not None and out.result == types.NPDatetime('ns'): # return signature(pandas_timestamp_type, ary, out.index) + + +# use ArrayAttribute for attributes not defined in SeriesAttribute +for attr, func in numba.typing.arraydecl.ArrayAttribute.__dict__.items(): + if attr.startswith('resolve_') and attr not in SeriesAttribute.__dict__: + setattr(SeriesAttribute, attr, func) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index dfd7ac5cd..da65fb72a 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -83,6 +83,15 @@ def test_impl(A): hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(df.A), test_impl(df.A)) + def test_series_attr1(self): + def test_impl(A): + return A.size + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + self.assertEqual(hpat_func(df.A), test_impl(df.A)) + def test_series_values1(self): def test_impl(A): return (A == 2).values From 297d49ef0e4503236ef6ead39fb85ca9aee56233 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Mon, 23 Jul 2018 16:38:29 -0400 Subject: [PATCH 03/36] avoid addition of array attrs that are not in series --- hpat/pd_series_ext.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index 4509ecb00..7627ed951 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -294,8 +294,12 @@ class CmpOpLTSeries(SeriesCompEqual): # if out is not None and out.result == types.NPDatetime('ns'): # return signature(pandas_timestamp_type, ary, out.index) +# TODO: add itemsize, strides, etc. when removed from Pandas +_not_series_array_attrs = ['flat', 'ctypes'] # use ArrayAttribute for attributes not defined in SeriesAttribute for attr, func in numba.typing.arraydecl.ArrayAttribute.__dict__.items(): - if attr.startswith('resolve_') and attr not in SeriesAttribute.__dict__: + if (attr.startswith('resolve_') + and attr not in SeriesAttribute.__dict__ + and attr not in _not_series_array_attrs): setattr(SeriesAttribute, attr, func) From a95c67cbf86637ae64d809eedd145566945f47b8 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Mon, 23 Jul 2018 19:18:55 -0400 Subject: [PATCH 04/36] avoid more array attrs that are not in series --- hpat/pd_series_ext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index 7627ed951..5ac8a5482 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -295,7 +295,7 @@ class CmpOpLTSeries(SeriesCompEqual): # return signature(pandas_timestamp_type, ary, out.index) # TODO: add itemsize, strides, etc. when removed from Pandas -_not_series_array_attrs = ['flat', 'ctypes'] +_not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten'] # use ArrayAttribute for attributes not defined in SeriesAttribute for attr, func in numba.typing.arraydecl.ArrayAttribute.__dict__.items(): From 66574ebe8d89d16404f6e15987199743108f521f Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Mon, 23 Jul 2018 19:19:15 -0400 Subject: [PATCH 05/36] test series copy --- hpat/tests/test_series.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index da65fb72a..1159a063d 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -92,6 +92,15 @@ def test_impl(A): hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(df.A), test_impl(df.A)) + def test_series_attr2(self): + def test_impl(A): + return A.copy() + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + self.assertEqual(hpat_func(df.A), test_impl(df.A)) + def test_series_values1(self): def test_impl(A): return (A == 2).values From 572b8fd7ba3b16509aef8f22513558f5f49f0888 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Mon, 23 Jul 2018 19:52:49 -0400 Subject: [PATCH 06/36] fix series attr2 test --- hpat/tests/test_series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index 1159a063d..a90083fca 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -94,12 +94,12 @@ def test_impl(A): def test_series_attr2(self): def test_impl(A): - return A.copy() + return A.copy().values n = 11 df = pd.DataFrame({'A': np.arange(n)}) hpat_func = hpat.jit(test_impl) - self.assertEqual(hpat_func(df.A), test_impl(df.A)) + np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) def test_series_values1(self): def test_impl(A): From 357c8c454195b1e43d30dec4b7a0928eff49fdc5 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Mon, 23 Jul 2018 19:53:17 -0400 Subject: [PATCH 07/36] hiframes_typed replace BoundFunction types (array.call) for Series --- hpat/hiframes_typed.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/hpat/hiframes_typed.py b/hpat/hiframes_typed.py index 12af8dba6..3823c9f5f 100644 --- a/hpat/hiframes_typed.py +++ b/hpat/hiframes_typed.py @@ -8,7 +8,8 @@ find_topo_order, gen_np_call, get_definition, guard, find_callname, mk_alloc, find_const, is_setitem, is_getitem) -from numba.typing.templates import Signature +from numba.typing.templates import Signature, bound_function +from numba.typing.arraydecl import ArrayAttribute import hpat from hpat.utils import get_definitions, debug_prints from hpat.hiframes import include_new_blocks, gen_empty_like @@ -61,6 +62,16 @@ def run(self): # print("replacing series type", vname) new_typ = series_to_array_type(typ) replace_series[vname] = new_typ + # replace array.call() variable types + if isinstance(typ, types.BoundFunction) and isinstance(typ.this, SeriesType): + this = series_to_array_type(typ.this) + # TODO: handle string arrays, etc. + assert typ.typing_key.startswith('array.') + attr = typ.typing_key[len('array.'):] + resolver = getattr(ArrayAttribute, 'resolve_'+attr).__wrapped__ + new_typ = bound_function(typ.typing_key)(resolver)( + ArrayAttribute(self.typingctx), this) + replace_series[vname] = new_typ for vname, typ in replace_series.items(): self.typemap.pop(vname) @@ -79,7 +90,14 @@ def run(self): # reusing sig.args since some types become Const in sig argtyps = sig.args[:len(call.args)] kwtyps = {name: self.typemap[v.name] for name, v in call.kws} - self.typemap[call.func.name].get_call_type(self.typingctx , argtyps, kwtyps) + + new_call_typ = self.typemap[call.func.name].get_call_type( + self.typingctx , argtyps, kwtyps) + # calltypes of things like BoundFunction (array.call) need to + # be update for lowering to work + if call in self.calltypes: + self.calltypes.pop(call) + self.calltypes[call] = new_call_typ self.func_ir._definitions = get_definitions(self.func_ir.blocks) return if_series_to_array_type(self.return_type) From b6cf81be252199b7b9796715d0c51a8e1ecaf590 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Mon, 23 Jul 2018 20:23:04 -0400 Subject: [PATCH 08/36] move hiframes_typed pass before pre_parfor_pass --- hpat/compiler.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hpat/compiler.py b/hpat/compiler.py index 2fc7eeeb0..172d26c0c 100644 --- a/hpat/compiler.py +++ b/hpat/compiler.py @@ -148,8 +148,11 @@ def define_pipelines(self, pm): # e.g. need to handle string array exprs before nopython rewrites # converts them to arrayexpr. # self.add_optimization_stage(pm) - pm.add_stage(self.stage_pre_parfor_pass, "Preprocessing for parfors") + # hiframes typed pass should be before pre_parfor since variable types + # need updating, and A.call to np.call transformation is invalid for + # Series (e.g. S.var is not the same as np.var(S)) pm.add_stage(self.stage_df_typed_pass, "typed hiframes pass") + pm.add_stage(self.stage_pre_parfor_pass, "Preprocessing for parfors") if not self.flags.no_rewrites: pm.add_stage(self.stage_nopython_rewrites, "nopython rewrites") if self.flags.auto_parallel.enabled: From d0a51b2cfbbfe91267d9cbcba920619dcbf33a19 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Mon, 23 Jul 2018 20:23:17 -0400 Subject: [PATCH 09/36] test np.call(Series) --- hpat/tests/test_series.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index a90083fca..dd0abf006 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -101,6 +101,15 @@ def test_impl(A): hpat_func = hpat.jit(test_impl) np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + def test_np_call_on_series1(self): + def test_impl(A): + return np.min(A) + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + def test_series_values1(self): def test_impl(A): return (A == 2).values From 41fc48b755d876e1e7e967fb69a6f71f2a136c0c Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Mon, 23 Jul 2018 21:45:08 -0400 Subject: [PATCH 10/36] replace Series methods without bound_function --- hpat/hiframes_typed.py | 9 ++++++--- hpat/tests/test_series.py | 9 +++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/hpat/hiframes_typed.py b/hpat/hiframes_typed.py index 3823c9f5f..aedf25c34 100644 --- a/hpat/hiframes_typed.py +++ b/hpat/hiframes_typed.py @@ -68,9 +68,12 @@ def run(self): # TODO: handle string arrays, etc. assert typ.typing_key.startswith('array.') attr = typ.typing_key[len('array.'):] - resolver = getattr(ArrayAttribute, 'resolve_'+attr).__wrapped__ - new_typ = bound_function(typ.typing_key)(resolver)( - ArrayAttribute(self.typingctx), this) + resolver = getattr(ArrayAttribute, 'resolve_'+attr) + # methods are either installed with install_array_method or + # using @bound_function in arraydecl.py + if hasattr(resolver, '__wrapped__'): + resolver = bound_function(typ.typing_key)(resolver.__wrapped__) + new_typ = resolver(ArrayAttribute(self.typingctx), this) replace_series[vname] = new_typ for vname, typ in replace_series.items(): diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index dd0abf006..a43e16d7d 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -101,6 +101,15 @@ def test_impl(A): hpat_func = hpat.jit(test_impl) np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + def test_series_attr3(self): + def test_impl(A): + return A.min() + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + self.assertEqual(hpat_func(df.A), test_impl(df.A)) + def test_np_call_on_series1(self): def test_impl(A): return np.min(A) From ef09d13deef72fa4af1fcf72b596e224b3861c59 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Mon, 23 Jul 2018 21:47:52 -0400 Subject: [PATCH 11/36] Series cumsum/cumprod typing --- hpat/pd_series_ext.py | 26 +++++++++++++++++++++++++- hpat/tests/test_series.py | 9 +++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index 5ac8a5482..fe0af1190 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -4,7 +4,7 @@ type_callable, infer) from numba.typing.templates import (infer_global, AbstractTemplate, signature, AttributeTemplate) -from numba.typing.arraydecl import get_array_index_type +from numba.typing.arraydecl import get_array_index_type, _expand_integer import hpat from hpat.str_ext import string_type from hpat.str_arr_ext import (string_array_type, offset_typ, char_typ, @@ -294,6 +294,30 @@ class CmpOpLTSeries(SeriesCompEqual): # if out is not None and out.result == types.NPDatetime('ns'): # return signature(pandas_timestamp_type, ary, out.index) +def install_array_method(name, generic, support_literals=False): + # taken from arraydecl.py, Series instead of Array + my_attr = {"key": "array." + name, "generic": generic} + temp_class = type("Series_" + name, (AbstractTemplate,), my_attr) + if support_literals: + temp_class.support_literals = support_literals + def array_attribute_attachment(self, ary): + return types.BoundFunction(temp_class, ary) + + setattr(SeriesAttribute, "resolve_" + name, array_attribute_attachment) + +def generic_expand_cumulative_series(self, args, kws): + # taken from arraydecl.py, replaced Array with Series + assert not args + assert not kws + assert isinstance(self.this, SeriesType) + return_type = SeriesType(dtype=_expand_integer(self.this.dtype), + ndim=1, layout='C') + return signature(return_type, recvr=self.this) + +# replacing cumsum/cumprod since arraydecl.py definition uses types.Array +for fname in ["cumsum", "cumprod"]: + install_array_method(fname, generic_expand_cumulative_series) + # TODO: add itemsize, strides, etc. when removed from Pandas _not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten'] diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index a43e16d7d..d2989d101 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -110,6 +110,15 @@ def test_impl(A): hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(df.A), test_impl(df.A)) + def test_series_attr4(self): + def test_impl(A): + return A.cumsum().values + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + def test_np_call_on_series1(self): def test_impl(A): return np.min(A) From 76228829dc3bc6f1ec7d0777423f62f9b1df4bef Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Mon, 23 Jul 2018 22:08:23 -0400 Subject: [PATCH 12/36] Series typing for argsort --- hpat/pd_series_ext.py | 12 ++++++++++-- hpat/tests/test_series.py | 10 ++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index fe0af1190..42f8082d1 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -3,8 +3,9 @@ from numba.extending import (models, register_model, lower_cast, infer_getattr, type_callable, infer) from numba.typing.templates import (infer_global, AbstractTemplate, signature, - AttributeTemplate) -from numba.typing.arraydecl import get_array_index_type, _expand_integer + AttributeTemplate, bound_function) +from numba.typing.arraydecl import (get_array_index_type, _expand_integer, + ArrayAttribute) import hpat from hpat.str_ext import string_type from hpat.str_arr_ext import (string_array_type, offset_typ, char_typ, @@ -227,6 +228,13 @@ class SeriesAttribute(AttributeTemplate): def resolve_values(self, ary): return series_to_array_type(ary, True) + @bound_function("array.argsort") + def resolve_argsort(self, ary, args, kws): + resolver = ArrayAttribute.resolve_argsort.__wrapped__ + sig = resolver(self, ary, args, kws) + sig.return_type = arr_to_series_type(sig.return_type) + return sig + # TODO: use ops logic from pandas/core/ops.py # # called from numba/numpy_support.py:resolve_output_type # # similar to SmartArray (targets/smartarray.py) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index d2989d101..c60e0913f 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -119,6 +119,16 @@ def test_impl(A): hpat_func = hpat.jit(test_impl) np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + @unittest.skip("needs argsort fix in canonicalize_array_math") + def test_series_attr5(self): + def test_impl(A): + return A.argsort().values + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + def test_np_call_on_series1(self): def test_impl(A): return np.min(A) From 8979d2cf926a8329273fbcfff599c8f6ca55a30b Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 07:29:07 -0400 Subject: [PATCH 13/36] typing for Series.take --- hpat/pd_series_ext.py | 7 +++++++ hpat/tests/test_series.py | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index 42f8082d1..fc5130ba3 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -235,6 +235,13 @@ def resolve_argsort(self, ary, args, kws): sig.return_type = arr_to_series_type(sig.return_type) return sig + @bound_function("array.take") + def resolve_take(self, ary, args, kws): + resolver = ArrayAttribute.resolve_take.__wrapped__ + sig = resolver(self, ary, args, kws) + sig.return_type = arr_to_series_type(sig.return_type) + return sig + # TODO: use ops logic from pandas/core/ops.py # # called from numba/numpy_support.py:resolve_output_type # # similar to SmartArray (targets/smartarray.py) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index c60e0913f..f4b8c3947 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -129,6 +129,15 @@ def test_impl(A): hpat_func = hpat.jit(test_impl) np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + def test_series_attr6(self): + def test_impl(A): + return A.take([2,3]).values + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + def test_np_call_on_series1(self): def test_impl(A): return np.min(A) From 9c737c45fd158fc9eb44290b1eeeef1fd3f10805 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 08:13:06 -0400 Subject: [PATCH 14/36] Series getitem typing --- hpat/pd_series_ext.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index fc5130ba3..c6a95e77f 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -342,3 +342,21 @@ def generic_expand_cumulative_series(self, args, kws): and attr not in SeriesAttribute.__dict__ and attr not in _not_series_array_attrs): setattr(SeriesAttribute, attr, func) + +@infer +class GetItemBuffer(AbstractTemplate): + key = "getitem" + + def generic(self, args, kws): + assert not kws + [series, idx] = args + if not isinstance(series, SeriesType): + return None + ary = series_to_array_type(series) + # TODO: string array, dt_index + out = get_array_index_type(ary, idx) + if out is not None: + ret_typ = arr_to_series_type(out.result) + if ret_typ is None: # not array output + ret_typ = out.result + return signature(ret_typ, ary, out.index) From 66e7a70eb4e41076d2d9984e9ca0375849204d42 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 08:26:53 -0400 Subject: [PATCH 15/36] Series setitem typing --- hpat/pd_series_ext.py | 21 +++++++++++++++++++-- hpat/tests/test_series.py | 15 +++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index c6a95e77f..2142fde27 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -5,7 +5,7 @@ from numba.typing.templates import (infer_global, AbstractTemplate, signature, AttributeTemplate, bound_function) from numba.typing.arraydecl import (get_array_index_type, _expand_integer, - ArrayAttribute) + ArrayAttribute, SetItemBuffer) import hpat from hpat.str_ext import string_type from hpat.str_arr_ext import (string_array_type, offset_typ, char_typ, @@ -344,7 +344,7 @@ def generic_expand_cumulative_series(self, args, kws): setattr(SeriesAttribute, attr, func) @infer -class GetItemBuffer(AbstractTemplate): +class GetItemSeries(AbstractTemplate): key = "getitem" def generic(self, args, kws): @@ -360,3 +360,20 @@ def generic(self, args, kws): if ret_typ is None: # not array output ret_typ = out.result return signature(ret_typ, ary, out.index) + +@infer +class SetItemSeries(SetItemBuffer): + key = "setitem" + + def generic(self, args, kws): + assert not kws + series, idx, val = args + if not isinstance(series, SeriesType): + return None + ary = series_to_array_type(series) + # TODO: strings, dt_index + res = super(SetItemSeries, self).generic((ary, idx, val), kws) + if res is not None: + new_series = arr_to_series_type(res.args[0]) + res.args = (new_series, res.args[1], res.args[2]) + return res diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index f4b8c3947..20b427624 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -176,6 +176,21 @@ def test_impl(A, i): hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(df.A, 0), test_impl(df.A, 0)) + def test_setitem_series2(self): + def test_impl(A, i): + A[i] = 100 + # TODO: remove return after aliasing fix + return A + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + A1 = df.A.copy() + A2 = df.A + hpat_func = hpat.jit(test_impl) + hpat_func(A1, 0) + test_impl(A2, 0) + np.testing.assert_array_equal(A1.values, A2.values) + def test_static_getitem_series1(self): def test_impl(A): return A[0] From 1e90d9b1087c3a84ad5ebe9fd8ef9e7aae6f190d Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 09:35:51 -0400 Subject: [PATCH 16/36] Series operators --- hpat/hiframes_api.py | 17 +++----------- hpat/hiframes_typed.py | 4 ++-- hpat/pd_series_ext.py | 50 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 16 deletions(-) diff --git a/hpat/hiframes_api.py b/hpat/hiframes_api.py index 100cb2ac5..90216bab9 100644 --- a/hpat/hiframes_api.py +++ b/hpat/hiframes_api.py @@ -22,7 +22,9 @@ import numpy as np from hpat.pd_timestamp_ext import timestamp_series_type, pandas_timestamp_type import hpat -from hpat.pd_series_ext import SeriesType, BoxedSeriesType, string_series_type, arr_to_series_type, arr_to_boxed_series_type, series_to_array_type +from hpat.pd_series_ext import (SeriesType, BoxedSeriesType, + string_series_type, arr_to_series_type, arr_to_boxed_series_type, + series_to_array_type, if_series_to_array_type) # from numba.typing.templates import infer_getattr, AttributeTemplate, bound_function # from numba import types @@ -664,19 +666,6 @@ def generic(self, args, kws): def to_series_dummy_impl(context, builder, sig, args): return impl_ret_borrowed(context, builder, sig.return_type, args[0]) -def if_series_to_array_type(typ, replace_boxed=False): - if isinstance(typ, SeriesType): - return series_to_array_type(typ, replace_boxed) - # XXX: Boxed series variable types shouldn't be replaced in hiframes_typed - # it results in cast error for call dummy_unbox_series - if replace_boxed and isinstance(typ, BoxedSeriesType): - return series_to_array_type(typ, replace_boxed) - if isinstance(typ, (types.Tuple, types.UniTuple)): - return types.Tuple( - [if_series_to_array_type(t, replace_boxed) for t in typ.types]) - # TODO: other types than can have Series inside: list, set, etc. - return typ - # dummy func to convert input series to array type def dummy_unbox_series(arr): diff --git a/hpat/hiframes_typed.py b/hpat/hiframes_typed.py index aedf25c34..bfb42dbca 100644 --- a/hpat/hiframes_typed.py +++ b/hpat/hiframes_typed.py @@ -13,11 +13,11 @@ import hpat from hpat.utils import get_definitions, debug_prints from hpat.hiframes import include_new_blocks, gen_empty_like -from hpat.hiframes_api import if_series_to_array_type from hpat.str_ext import string_type from hpat.str_arr_ext import string_array_type, StringArrayType, is_str_arr_typ from hpat.pd_series_ext import (SeriesType, string_series_type, - series_to_array_type, BoxedSeriesType, dt_index_series_type) + series_to_array_type, BoxedSeriesType, dt_index_series_type, + if_series_to_array_type) class HiFramesTyped(object): diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index 2142fde27..a574a54ae 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -6,6 +6,8 @@ AttributeTemplate, bound_function) from numba.typing.arraydecl import (get_array_index_type, _expand_integer, ArrayAttribute, SetItemBuffer) +from numba.typing.npydecl import (NumpyRulesArrayOperator, + NumpyRulesInplaceArrayOperator, NumpyRulesUnaryArrayOperator) import hpat from hpat.str_ext import string_type from hpat.str_arr_ext import (string_array_type, offset_typ, char_typ, @@ -207,6 +209,27 @@ def arr_to_boxed_series_type(arr): return series_type +def if_series_to_array_type(typ, replace_boxed=False): + if isinstance(typ, SeriesType): + return series_to_array_type(typ, replace_boxed) + # XXX: Boxed series variable types shouldn't be replaced in hiframes_typed + # it results in cast error for call dummy_unbox_series + if replace_boxed and isinstance(typ, BoxedSeriesType): + return series_to_array_type(typ, replace_boxed) + if isinstance(typ, (types.Tuple, types.UniTuple)): + return types.Tuple( + [if_series_to_array_type(t, replace_boxed) for t in typ.types]) + # TODO: other types than can have Series inside: list, set, etc. + return typ + +def if_arr_to_series_type(typ): + if isinstance(typ, types.Array): + return arr_to_series_type(typ) + if isinstance(typ, (types.Tuple, types.UniTuple)): + return types.Tuple([if_arr_to_series_type(t) for t in typ.types]) + # TODO: other types than can have Arrays inside: list, set, etc. + return typ + @lower_cast(string_series_type, string_array_type) @lower_cast(string_array_type, string_series_type) def cast_string_series(context, builder, fromty, toty, val): @@ -377,3 +400,30 @@ def generic(self, args, kws): new_series = arr_to_series_type(res.args[0]) res.args = (new_series, res.args[1], res.args[2]) return res + +def series_op_generic(self, args, kws): + # return if no Series + if not any(isinstance(arg, SeriesType) for arg in args): + return None + # convert args to array + new_args = tuple(if_series_to_array_type(arg) for arg in args) + sig = super(SeriesOpUfuncs, self).generic(new_args, kws) + # convert back to Series + if sig is not None: + sig.return_type = if_arr_to_series_type(sig.return_type) + sig.args = tuple(if_arr_to_series_type(a) for a in sig.args) + return sig + +class SeriesOpUfuncs(NumpyRulesArrayOperator): + generic = series_op_generic + +class SeriesInplaceOpUfuncs(NumpyRulesInplaceArrayOperator): + generic = series_op_generic + +class SeriesUnaryOpUfuncs(NumpyRulesUnaryArrayOperator): + generic = series_op_generic + +# TODO: change class name to Series in install_operations +SeriesOpUfuncs.install_operations() +SeriesInplaceOpUfuncs.install_operations() +SeriesUnaryOpUfuncs.install_operations() From 07050dce9314cde5e1b277fd236c3f8f901c96ba Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 09:38:11 -0400 Subject: [PATCH 17/36] test series op --- hpat/tests/test_series.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index 20b427624..c7276fe4a 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -209,6 +209,24 @@ def test_impl(A, i): hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(df.A, 0), test_impl(df.A, 0)) + def test_series_op1(self): + def test_impl(A, i): + return A+A + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A, 0), test_impl(df.A, 0)) + + def test_series_op2(self): + def test_impl(A, i): + return A+i + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A, 1), test_impl(df.A, 1)) + def test_list_convert(self): def test_impl(): df = pd.DataFrame({'one': np.array([-1, np.nan, 2.5]), From b6cf43c751fe905e0317d9998246e668b6549b80 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 09:56:29 -0400 Subject: [PATCH 18/36] Series op typing cls fix --- hpat/pd_series_ext.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index a574a54ae..a0d76610c 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -6,7 +6,7 @@ AttributeTemplate, bound_function) from numba.typing.arraydecl import (get_array_index_type, _expand_integer, ArrayAttribute, SetItemBuffer) -from numba.typing.npydecl import (NumpyRulesArrayOperator, +from numba.typing.npydecl import (Numpy_rules_ufunc, NumpyRulesArrayOperator, NumpyRulesInplaceArrayOperator, NumpyRulesUnaryArrayOperator) import hpat from hpat.str_ext import string_type @@ -401,13 +401,13 @@ def generic(self, args, kws): res.args = (new_series, res.args[1], res.args[2]) return res -def series_op_generic(self, args, kws): +def series_op_generic(cls, self, args, kws): # return if no Series if not any(isinstance(arg, SeriesType) for arg in args): return None # convert args to array new_args = tuple(if_series_to_array_type(arg) for arg in args) - sig = super(SeriesOpUfuncs, self).generic(new_args, kws) + sig = super(cls, self).generic(new_args, kws) # convert back to Series if sig is not None: sig.return_type = if_arr_to_series_type(sig.return_type) @@ -415,13 +415,16 @@ def series_op_generic(self, args, kws): return sig class SeriesOpUfuncs(NumpyRulesArrayOperator): - generic = series_op_generic + def generic(self, args, kws): + return series_op_generic(SeriesOpUfuncs, self, args, kws) class SeriesInplaceOpUfuncs(NumpyRulesInplaceArrayOperator): - generic = series_op_generic + def generic(self, args, kws): + return series_op_generic(SeriesInplaceOpUfuncs, self, args, kws) class SeriesUnaryOpUfuncs(NumpyRulesUnaryArrayOperator): - generic = series_op_generic + def generic(self, args, kws): + return series_op_generic(SeriesUnaryOpUfuncs, self, args, kws) # TODO: change class name to Series in install_operations SeriesOpUfuncs.install_operations() From aaeba6efeba85f9a03c9b41ff4da8107a236d4c6 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 09:57:30 -0400 Subject: [PATCH 19/36] inplace op Series test --- hpat/tests/test_series.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index c7276fe4a..ac79a4c07 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -227,6 +227,16 @@ def test_impl(A, i): hpat_func = hpat.jit(test_impl) np.testing.assert_array_equal(hpat_func(df.A, 1), test_impl(df.A, 1)) + def test_series_op3(self): + def test_impl(A, i): + A += i + return A + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A.copy(), 1), test_impl(df.A, 1)) + def test_list_convert(self): def test_impl(): df = pd.DataFrame({'one': np.array([-1, np.nan, 2.5]), From f600aa97c332adbc62d56679dd474492771ad367 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 10:01:57 -0400 Subject: [PATCH 20/36] Series ufunc install --- hpat/pd_series_ext.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index a0d76610c..5160d1cc2 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -1,3 +1,4 @@ +import numpy as np import numba from numba import types from numba.extending import (models, register_model, lower_cast, infer_getattr, @@ -430,3 +431,23 @@ def generic(self, args, kws): SeriesOpUfuncs.install_operations() SeriesInplaceOpUfuncs.install_operations() SeriesUnaryOpUfuncs.install_operations() + +class Series_Numpy_rules_ufunc(Numpy_rules_ufunc): + def generic(self, args, kws): + return series_op_generic(Series_Numpy_rules_ufunc, self, args, kws) + +# copied from npydecl.py since deleted +_aliases = set(["bitwise_not", "mod", "abs"]) +if np.divide == np.true_divide: + _aliases.add("divide") + +for func in numba.typing.npydecl.supported_ufuncs: + name = func.__name__ + #_numpy_ufunc(func) + class typing_class(Series_Numpy_rules_ufunc): + key = func + + typing_class.__name__ = "resolve_series_{0}".format(name) + + if not name in _aliases: + infer_global(func, types.Function(typing_class)) From 28c18bc8f6ebf1a9cb053d61d2c7388a1b19e3ed Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 10:32:40 -0400 Subject: [PATCH 21/36] test series ufunc --- hpat/tests/test_series.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index ac79a4c07..16ef809c5 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -237,6 +237,15 @@ def test_impl(A, i): hpat_func = hpat.jit(test_impl) np.testing.assert_array_equal(hpat_func(df.A.copy(), 1), test_impl(df.A, 1)) + def test_series_ufunc1(self): + def test_impl(A, i): + return np.isinf(A).values + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A, 1), test_impl(df.A, 1)) + def test_list_convert(self): def test_impl(): df = pd.DataFrame({'one': np.array([-1, np.nan, 2.5]), From bf5769065760ae5cdf642d749d357340ca098b20 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 10:53:07 -0400 Subject: [PATCH 22/36] add if to arr_to_series since Series is not Array subtype anymore --- hpat/hiframes_api.py | 4 ++-- hpat/pd_series_ext.py | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/hpat/hiframes_api.py b/hpat/hiframes_api.py index 90216bab9..6b1cd44f6 100644 --- a/hpat/hiframes_api.py +++ b/hpat/hiframes_api.py @@ -23,7 +23,7 @@ from hpat.pd_timestamp_ext import timestamp_series_type, pandas_timestamp_type import hpat from hpat.pd_series_ext import (SeriesType, BoxedSeriesType, - string_series_type, arr_to_series_type, arr_to_boxed_series_type, + string_series_type, if_arr_to_series_type, arr_to_boxed_series_type, series_to_array_type, if_series_to_array_type) # from numba.typing.templates import infer_getattr, AttributeTemplate, bound_function @@ -658,7 +658,7 @@ def generic(self, args, kws): if isinstance(arr, BoxedSeriesType): series_type = SeriesType(arr.dtype, 1, 'C') else: - series_type = arr_to_series_type(arr) + series_type = if_arr_to_series_type(arr) assert series_type is not None, "unknown type for pd.Series: {}".format(arr) return signature(series_type, arr) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index 5160d1cc2..f3b9a0641 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -256,14 +256,14 @@ def resolve_values(self, ary): def resolve_argsort(self, ary, args, kws): resolver = ArrayAttribute.resolve_argsort.__wrapped__ sig = resolver(self, ary, args, kws) - sig.return_type = arr_to_series_type(sig.return_type) + sig.return_type = if_arr_to_series_type(sig.return_type) return sig @bound_function("array.take") def resolve_take(self, ary, args, kws): resolver = ArrayAttribute.resolve_take.__wrapped__ sig = resolver(self, ary, args, kws) - sig.return_type = arr_to_series_type(sig.return_type) + sig.return_type = if_arr_to_series_type(sig.return_type) return sig # TODO: use ops logic from pandas/core/ops.py @@ -380,9 +380,7 @@ def generic(self, args, kws): # TODO: string array, dt_index out = get_array_index_type(ary, idx) if out is not None: - ret_typ = arr_to_series_type(out.result) - if ret_typ is None: # not array output - ret_typ = out.result + ret_typ = if_arr_to_series_type(out.result) return signature(ret_typ, ary, out.index) @infer @@ -398,7 +396,7 @@ def generic(self, args, kws): # TODO: strings, dt_index res = super(SetItemSeries, self).generic((ary, idx, val), kws) if res is not None: - new_series = arr_to_series_type(res.args[0]) + new_series = if_arr_to_series_type(res.args[0]) res.args = (new_series, res.args[1], res.args[2]) return res From 9d959b7f9694ec79dc61e92bf88238f039c67700 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 10:59:13 -0400 Subject: [PATCH 23/36] fix if_arr_to_series_type for string array --- hpat/pd_series_ext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index f3b9a0641..742afe5e2 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -224,7 +224,7 @@ def if_series_to_array_type(typ, replace_boxed=False): return typ def if_arr_to_series_type(typ): - if isinstance(typ, types.Array): + if isinstance(typ, types.Array) or typ == string_array_type: return arr_to_series_type(typ) if isinstance(typ, (types.Tuple, types.UniTuple)): return types.Tuple([if_arr_to_series_type(t) for t in typ.types]) From 9b555606ba255420e8e5f4fd970cb5d481ae4bcb Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 11:18:44 -0400 Subject: [PATCH 24/36] series len --- hpat/pd_series_ext.py | 6 ++++++ hpat/tests/test_series.py | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index 742afe5e2..7178880a7 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -449,3 +449,9 @@ class typing_class(Series_Numpy_rules_ufunc): if not name in _aliases: infer_global(func, types.Function(typing_class)) + +@infer_global(len) +class LenSeriesType(AbstractTemplate): + def generic(self, args, kws): + if not kws and len(args) == 1 and isinstance(args[0], SeriesType): + return signature(types.intp, *args) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index 16ef809c5..953719d63 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -237,6 +237,15 @@ def test_impl(A, i): hpat_func = hpat.jit(test_impl) np.testing.assert_array_equal(hpat_func(df.A.copy(), 1), test_impl(df.A, 1)) + def test_series_len(self): + def test_impl(A, i): + return len(A) + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + self.assertEqual(hpat_func(df.A, 0), test_impl(df.A, 0)) + def test_series_ufunc1(self): def test_impl(A, i): return np.isinf(A).values From 4b02d327eca8003e5a7f0dc6371401135f2bd298 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 12:06:20 -0400 Subject: [PATCH 25/36] string Series getitem typing --- hpat/pd_series_ext.py | 19 +++++++++++++------ hpat/tests/test_series.py | 8 ++++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index 7178880a7..e814249c4 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -12,7 +12,7 @@ import hpat from hpat.str_ext import string_type from hpat.str_arr_ext import (string_array_type, offset_typ, char_typ, - str_arr_payload_type, StringArrayType) + str_arr_payload_type, StringArrayType, GetItemStringArray) from hpat.pd_timestamp_ext import pandas_timestamp_type # TODO: implement type inference instead of subtyping array since Pandas as of @@ -377,11 +377,18 @@ def generic(self, args, kws): if not isinstance(series, SeriesType): return None ary = series_to_array_type(series) - # TODO: string array, dt_index - out = get_array_index_type(ary, idx) - if out is not None: - ret_typ = if_arr_to_series_type(out.result) - return signature(ret_typ, ary, out.index) + + # TODO: dt_index + if ary == string_array_type: + sig = GetItemStringArray.generic(self, (ary, idx), kws) + else: + out = get_array_index_type(ary, idx) + sig = signature(out.result, ary, out.index) + + if sig is not None: + sig.return_type = if_arr_to_series_type(sig.return_type) + sig.args = tuple(if_arr_to_series_type(a) for a in sig.args) + return sig @infer class SetItemSeries(SetItemBuffer): diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index 953719d63..633ab8889 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -209,6 +209,14 @@ def test_impl(A, i): hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(df.A, 0), test_impl(df.A, 0)) + def test_getitem_series_str1(self): + def test_impl(A, i): + return A[i] + + df = pd.DataFrame({'A': ['aa', 'bb', 'cc']}) + hpat_func = hpat.jit(test_impl) + self.assertEqual(hpat_func(df.A, 0), test_impl(df.A, 0)) + def test_series_op1(self): def test_impl(A, i): return A+A From 11b25bd1ba29b17681ff6201ba040bae4d3f2052 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 13:07:55 -0400 Subject: [PATCH 26/36] fix series calltype replacement for np.typ() calls --- hpat/hiframes_typed.py | 5 ++++- hpat/tests/test_series.py | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/hpat/hiframes_typed.py b/hpat/hiframes_typed.py index bfb42dbca..dbb9fdf17 100644 --- a/hpat/hiframes_typed.py +++ b/hpat/hiframes_typed.py @@ -56,6 +56,8 @@ def run(self): if debug_prints(): # pragma: no cover print("types before Series replacement:", self.typemap) + print("calltypes: ", self.calltypes) + replace_series = {} for vname, typ in self.typemap.items(): if isinstance(typ, SeriesType): @@ -98,7 +100,8 @@ def run(self): self.typingctx , argtyps, kwtyps) # calltypes of things like BoundFunction (array.call) need to # be update for lowering to work - if call in self.calltypes: + # XXX: new_call_typ could be None for things like np.int32() + if call in self.calltypes and new_call_typ is not None: self.calltypes.pop(call) self.calltypes[call] = new_call_typ diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index 633ab8889..c17503086 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -254,6 +254,15 @@ def test_impl(A, i): hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(df.A, 0), test_impl(df.A, 0)) + def test_np_typ_call_replace(self): + # calltype replacement is tricky for np.typ() calls since variable + # type can't provide calltype + def test_impl(i): + return np.int32(i) + + hpat_func = hpat.jit(test_impl) + self.assertEqual(hpat_func(1), test_impl(1)) + def test_series_ufunc1(self): def test_impl(A, i): return np.isinf(A).values From c5475edcbc168607091c34c305011dcf11289275 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 14:11:18 -0400 Subject: [PATCH 27/36] fix Series call type replace for undefined types --- hpat/hiframes_typed.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/hpat/hiframes_typed.py b/hpat/hiframes_typed.py index dbb9fdf17..b6d3ab258 100644 --- a/hpat/hiframes_typed.py +++ b/hpat/hiframes_typed.py @@ -8,7 +8,7 @@ find_topo_order, gen_np_call, get_definition, guard, find_callname, mk_alloc, find_const, is_setitem, is_getitem) -from numba.typing.templates import Signature, bound_function +from numba.typing.templates import Signature, bound_function, signature from numba.typing.arraydecl import ArrayAttribute import hpat from hpat.utils import get_definitions, debug_prints @@ -55,7 +55,7 @@ def run(self): blocks[label].body = new_body if debug_prints(): # pragma: no cover - print("types before Series replacement:", self.typemap) + print("--- types before Series replacement:", self.typemap) print("calltypes: ", self.calltypes) replace_series = {} @@ -96,14 +96,21 @@ def run(self): argtyps = sig.args[:len(call.args)] kwtyps = {name: self.typemap[v.name] for name, v in call.kws} - new_call_typ = self.typemap[call.func.name].get_call_type( + new_sig = self.typemap[call.func.name].get_call_type( self.typingctx , argtyps, kwtyps) # calltypes of things like BoundFunction (array.call) need to # be update for lowering to work - # XXX: new_call_typ could be None for things like np.int32() - if call in self.calltypes and new_call_typ is not None: - self.calltypes.pop(call) - self.calltypes[call] = new_call_typ + # XXX: new_sig could be None for things like np.int32() + if call in self.calltypes and new_sig is not None: + old_sig = self.calltypes.pop(call) + # fix types with undefined dtypes in empty_inferred, etc. + return_type = _fix_typ_undefs(new_sig.return_type, old_sig.return_type) + args = tuple(_fix_typ_undefs(a, b) for a,b in zip(new_sig.args, old_sig.args)) + self.calltypes[call] = signature(return_type, *args) + + if debug_prints(): # pragma: no cover + print("--- types after Series replacement:", self.typemap) + print("calltypes: ", self.calltypes) self.func_ir._definitions = get_definitions(self.func_ir.blocks) return if_series_to_array_type(self.return_type) @@ -457,6 +464,18 @@ def is_bool_arr(self, varname): typ = self.typemap[varname] return isinstance(typ, types.npytypes.Array) and typ.dtype == types.bool_ +def _fix_typ_undefs(new_typ, old_typ): + if isinstance(old_typ, (types.Array, SeriesType)): + assert isinstance(new_typ, (types.Array, SeriesType)) + if new_typ.dtype == types.undefined: + return new_typ.copy(old_typ.dtype) + if isinstance(old_typ, (types.Tuple, types.UniTuple)): + return types.Tuple([_fix_typ_undefs(t, u) + for t, u in zip(new_typ.types, old_typ.types)]) + # TODO: fix List, Set + return new_typ + + # float columns can have regular np.nan From 5a749d87458c65d453f07bfe0dbd8e72d3253713 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 14:42:51 -0400 Subject: [PATCH 28/36] hiframes_typed calltypes replace full signature to keep side-effects --- hpat/hiframes_typed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hpat/hiframes_typed.py b/hpat/hiframes_typed.py index b6d3ab258..443d66aaa 100644 --- a/hpat/hiframes_typed.py +++ b/hpat/hiframes_typed.py @@ -106,7 +106,7 @@ def run(self): # fix types with undefined dtypes in empty_inferred, etc. return_type = _fix_typ_undefs(new_sig.return_type, old_sig.return_type) args = tuple(_fix_typ_undefs(a, b) for a,b in zip(new_sig.args, old_sig.args)) - self.calltypes[call] = signature(return_type, *args) + self.calltypes[call] = Signature(return_type, args, new_sig.recvr, new_sig.pysig) if debug_prints(): # pragma: no cover print("--- types after Series replacement:", self.typemap) From b007d710014f32c0a8ff9c7811ffb5358c651185 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 16:16:20 -0400 Subject: [PATCH 29/36] empty_like Series typing commented out pending Numba fix --- hpat/pd_series_ext.py | 17 ++++++++++++++++- hpat/tests/test_series.py | 10 ++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index e814249c4..e98756be2 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -8,7 +8,8 @@ from numba.typing.arraydecl import (get_array_index_type, _expand_integer, ArrayAttribute, SetItemBuffer) from numba.typing.npydecl import (Numpy_rules_ufunc, NumpyRulesArrayOperator, - NumpyRulesInplaceArrayOperator, NumpyRulesUnaryArrayOperator) + NumpyRulesInplaceArrayOperator, NumpyRulesUnaryArrayOperator, + NdConstructorLike) import hpat from hpat.str_ext import string_type from hpat.str_arr_ext import (string_array_type, offset_typ, char_typ, @@ -462,3 +463,17 @@ class LenSeriesType(AbstractTemplate): def generic(self, args, kws): if not kws and len(args) == 1 and isinstance(args[0], SeriesType): return signature(types.intp, *args) + +# @infer_global(np.empty_like) +# @infer_global(np.zeros_like) +# @infer_global(np.ones_like) +# class SeriesLikeTyper(NdConstructorLike): +# def generic(self): +# typer = super(SeriesLikeTyper, self).generic() +# def wrapper(*args, **kws): +# new_args = tuple(if_series_to_array_type(arg) for arg in args) +# new_kws = {n:if_series_to_array_type(t) for n,t in kws.items()} +# return typer(*new_args, **new_kws) +# return wrapper + +#@infer_global(np.full_like) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index c17503086..bcbe8b4d0 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -285,5 +285,15 @@ def test_impl(): self.assertTrue(isinstance(two, np.ndarray)) self.assertTrue(isinstance(three, np.ndarray)) + @unittest.skip("needs empty_like typing fix in npydecl.py") + def test_series_empty_like(self): + def test_impl(A): + return np.empty_like(A) + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + self.assertTrue(isinstance(hpat_func(df.A), np.ndarray)) + if __name__ == "__main__": unittest.main() From 7da9f77befab10f04ec3ab69649f4ec19780a7d4 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 16:24:50 -0400 Subject: [PATCH 30/36] replace hiframes empty_like with empty() --- hpat/hiframes.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/hpat/hiframes.py b/hpat/hiframes.py index 6814397cb..1e27c72b0 100644 --- a/hpat/hiframes.py +++ b/hpat/hiframes.py @@ -1881,20 +1881,13 @@ def _get_renamed_df(self, df_var): def gen_empty_like(in_arr, out_arr): - scope = in_arr.scope - loc = in_arr.loc - # g_np_var = Global(numpy) - g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) - g_np = ir.Global('np', np, loc) - g_np_assign = ir.Assign(g_np, g_np_var, loc) - # attr call: empty_attr = getattr(g_np_var, empty_like) - empty_attr_call = ir.Expr.getattr(g_np_var, "empty_like", loc) - attr_var = ir.Var(scope, mk_unique_var("$empty_attr_attr"), loc) - attr_assign = ir.Assign(empty_attr_call, attr_var, loc) - # alloc call: out_arr = empty_attr(in_arr) - alloc_call = ir.Expr.call(attr_var, [in_arr], (), loc) - alloc_assign = ir.Assign(alloc_call, out_arr, loc) - return [g_np_assign, attr_assign, alloc_assign] + def f(A): # pragma: no cover + B = np.empty(A.shape, A.dtype) + f_block = compile_to_numba_ir(f, {'hpat': hpat, 'np': np}).blocks.popitem()[1] + replace_arg_nodes(f_block, [in_arr]) + nodes = f_block.body[:-3] # remove none return + nodes[-1].target = out_arr + return nodes def gen_stencil_call(in_arr, out_arr, kernel_func, index_offsets, fir_globals, From 316b0b9f93a59de264aed6660e34b69ddbfd630e Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 16:25:00 -0400 Subject: [PATCH 31/36] test series fillna --- hpat/tests/test_series.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index bcbe8b4d0..2f276fd60 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -295,5 +295,13 @@ def test_impl(A): hpat_func = hpat.jit(test_impl) self.assertTrue(isinstance(hpat_func(df.A), np.ndarray)) + def test_series_fillna(self): + def test_impl(A): + return A.fillna(5.0) + + df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + if __name__ == "__main__": unittest.main() From d082e448f6c28fd8bfdd47da082d7a66f39d11a5 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 16:54:48 -0400 Subject: [PATCH 32/36] hiframes_typed replace series type in compile_to_numba_ir --- hpat/hiframes_typed.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/hpat/hiframes_typed.py b/hpat/hiframes_typed.py index 443d66aaa..90f351504 100644 --- a/hpat/hiframes_typed.py +++ b/hpat/hiframes_typed.py @@ -186,7 +186,7 @@ def f(column): # pragma: no cover a = column.astype(np.float64) f_block = compile_to_numba_ir(f, {'hpat': hpat, 'np': np}, self.typingctx, - (self.typemap[in_arr.name],), + (if_series_to_array_type(self.typemap[in_arr.name]),), self.typemap, self.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [in_arr]) nodes = f_block.body[:-3] @@ -235,8 +235,8 @@ def _handle_dt_index_binop(self, lhs, rhs, assign): f_blocks = compile_to_numba_ir(f, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[arg1.name], - self.typemap[arg2.name]), + (if_series_to_array_type(self.typemap[arg1.name]), + if_series_to_array_type(self.typemap[arg2.name])), self.typemap, self.calltypes).blocks replace_arg_nodes(f_blocks[min(f_blocks.keys())], [arg1, arg2]) # replace == expression with result of parfor (S) @@ -279,8 +279,8 @@ def _handle_string_array_expr(self, lhs, rhs, assign): f = loc_vars['f'] f_blocks = compile_to_numba_ir(f, {'numba': numba, 'np': np}, self.typingctx, - (self.typemap[arg1.name], - self.typemap[arg2.name]), + (if_series_to_array_type(self.typemap[arg1.name]), + if_series_to_array_type(self.typemap[arg2.name])), self.typemap, self.calltypes).blocks replace_arg_nodes(f_blocks[min(f_blocks.keys())], [arg1, arg2]) # replace == expression with result of parfor (S) @@ -304,7 +304,7 @@ def f(_in_arr): # pragma: no cover _alloc_size = _in_arr.shape _out_arr = np.empty(_alloc_size, _in_arr.dtype) - f_block = compile_to_numba_ir(f, {'np': np}, self.typingctx, (self.typemap[in_arr.name],), + f_block = compile_to_numba_ir(f, {'np': np}, self.typingctx, (if_series_to_array_type(self.typemap[in_arr.name]),), self.typemap, self.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [in_arr]) nodes = f_block.body[:-3] # remove none return @@ -333,8 +333,8 @@ def _handle_str_contains(self, assign, lhs, rhs, fname): f_blocks = compile_to_numba_ir(f, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[str_arr.name], - self.typemap[pat.name]), + (if_series_to_array_type(self.typemap[str_arr.name]), + if_series_to_array_type(self.typemap[pat.name])), self.typemap, self.calltypes).blocks replace_arg_nodes(f_blocks[min(f_blocks.keys())], [str_arr, pat]) # replace call with result of parfor (S) @@ -355,7 +355,7 @@ def _handle_df_col_filter(self, lhs_name, rhs, assign): index_var = rhs.index f_blocks = compile_to_numba_ir(_column_filter_impl_float, {'numba': numba, 'np': np}, self.typingctx, - (self.typemap[lhs.name], self.typemap[in_arr.name], + (if_series_to_array_type(self.typemap[lhs.name]), if_series_to_array_type(self.typemap[in_arr.name]), self.typemap[index_var.name]), self.typemap, self.calltypes).blocks first_block = min(f_blocks.keys()) @@ -378,7 +378,7 @@ def f(_in_arr, _ind): f_block = compile_to_numba_ir(f, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[in_arr.name], types.intp), + (if_series_to_array_type(self.typemap[in_arr.name]), types.intp), self.typemap, self.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [in_arr, ind]) nodes = f_block.body[:-3] # remove none return @@ -390,7 +390,7 @@ def f(_in_arr, _ind): f_blocks = compile_to_numba_ir(_column_count_impl, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[in_arr.name],), + (if_series_to_array_type(self.typemap[in_arr.name]),), self.typemap, self.calltypes).blocks topo_order = find_topo_order(f_blocks) first_block = topo_order[0] @@ -406,8 +406,8 @@ def f(_in_arr, _ind): val = rhs.args[2] f_blocks = compile_to_numba_ir(_column_fillna_impl, {'numba': numba, 'np': np}, self.typingctx, - (self.typemap[out_arr.name], self.typemap[in_arr.name], - self.typemap[val.name]), + (if_series_to_array_type(self.typemap[out_arr.name]), if_series_to_array_type(self.typemap[in_arr.name]), + if_series_to_array_type(self.typemap[val.name])), self.typemap, self.calltypes).blocks first_block = min(f_blocks.keys()) replace_arg_nodes(f_blocks[first_block], [out_arr, in_arr, val]) @@ -418,7 +418,7 @@ def f(_in_arr, _ind): f_blocks = compile_to_numba_ir(_column_sum_impl, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[in_arr.name],), + (if_series_to_array_type(self.typemap[in_arr.name]),), self.typemap, self.calltypes).blocks topo_order = find_topo_order(f_blocks) first_block = topo_order[0] @@ -433,7 +433,7 @@ def f(_in_arr, _ind): f_blocks = compile_to_numba_ir(_column_mean_impl, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[in_arr.name],), + (if_series_to_array_type(self.typemap[in_arr.name]),), self.typemap, self.calltypes).blocks topo_order = find_topo_order(f_blocks) first_block = topo_order[0] @@ -448,7 +448,7 @@ def f(_in_arr, _ind): f_blocks = compile_to_numba_ir(_column_var_impl, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[in_arr.name],), + (if_series_to_array_type(self.typemap[in_arr.name]),), self.typemap, self.calltypes).blocks topo_order = find_topo_order(f_blocks) first_block = topo_order[0] From 02226e441a72cb4dfa62e0f708b259a94fbaebfa Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 24 Jul 2018 17:58:36 -0400 Subject: [PATCH 33/36] fix Series concatenate() type --- hpat/hiframes_api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hpat/hiframes_api.py b/hpat/hiframes_api.py index 6b1cd44f6..b7a4f0a40 100644 --- a/hpat/hiframes_api.py +++ b/hpat/hiframes_api.py @@ -93,7 +93,8 @@ def generic(self, args, kws): ret_typ = string_array_type else: # use typer of np.concatenate - ret_typ = numba.typing.npydecl.NdConcatenate(self.context).generic()(arr_list) + arr_list_to_arr = if_series_to_array_type(arr_list) + ret_typ = numba.typing.npydecl.NdConcatenate(self.context).generic()(arr_list_to_arr) return signature(ret_typ, arr_list) From 036dcfd6459f5eb0d397e508cd6a239080c66bf1 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Wed, 25 Jul 2018 09:06:38 -0400 Subject: [PATCH 34/36] hiframes to_arr_from_series() in Stencil --- hpat/hiframes.py | 6 ++++-- hpat/hiframes_api.py | 15 +++++++++++++++ hpat/hiframes_typed.py | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/hpat/hiframes.py b/hpat/hiframes.py index 1e27c72b0..6d3c6ad18 100644 --- a/hpat/hiframes.py +++ b/hpat/hiframes.py @@ -1911,8 +1911,10 @@ def gen_stencil_call(in_arr, out_arr, kernel_func, index_offsets, fir_globals, stencil_nodes.append(ir.Assign(kernel_func, kernel_var, loc)) def f(A, B, f): # pragma: no cover - numba.stencil(f)(A, out=B) - f_block = compile_to_numba_ir(f, {'numba': numba}).blocks.popitem()[1] + in_arr = hpat.hiframes_api.to_arr_from_series(A) + numba.stencil(f)(in_arr, out=B) + f_block = compile_to_numba_ir(f, {'numba': numba, + 'hpat': hpat}).blocks.popitem()[1] replace_arg_nodes(f_block, [in_arr, out_arr, kernel_var]) stencil_nodes += f_block.body[:-3] # remove none return setup_call = stencil_nodes[-2].value diff --git a/hpat/hiframes_api.py b/hpat/hiframes_api.py index b7a4f0a40..6e5f73c5e 100644 --- a/hpat/hiframes_api.py +++ b/hpat/hiframes_api.py @@ -668,6 +668,21 @@ def to_series_dummy_impl(context, builder, sig, args): return impl_ret_borrowed(context, builder, sig.return_type, args[0]) +def to_arr_from_series(arr): + return arr + +@infer_global(to_arr_from_series) +class ToArrFromSeriesType(AbstractTemplate): + def generic(self, args, kws): + assert not kws + assert len(args) == 1 + arr = args[0] + return signature(if_series_to_array_type(arr), arr) + +@lower_builtin(to_arr_from_series, types.Any) +def to_arr_from_series_dummy_impl(context, builder, sig, args): + return impl_ret_borrowed(context, builder, sig.return_type, args[0]) + # dummy func to convert input series to array type def dummy_unbox_series(arr): return arr diff --git a/hpat/hiframes_typed.py b/hpat/hiframes_typed.py index 90f351504..2e793d945 100644 --- a/hpat/hiframes_typed.py +++ b/hpat/hiframes_typed.py @@ -161,7 +161,7 @@ def _run_assign(self, assign): return [assign] def _run_call_hiframes(self, assign, lhs, rhs, func_name): - if func_name == 'to_series_type': + if func_name in ('to_series_type', 'to_arr_from_series'): assign.value = rhs.args[0] return [assign] From 2de0c83810b2c8ea34784598a9e16cdb263f0a32 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Wed, 25 Jul 2018 09:26:12 -0400 Subject: [PATCH 35/36] update column test parfor counts (hiframes_typed before preparfor improves fusion) --- hpat/tests/test_hiframes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hpat/tests/test_hiframes.py b/hpat/tests/test_hiframes.py index 9360526bc..fb5ddd06a 100644 --- a/hpat/tests/test_hiframes.py +++ b/hpat/tests/test_hiframes.py @@ -37,7 +37,7 @@ def test_impl(n): self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) - self.assertEqual(count_parfor_OneDs(), 2) + self.assertEqual(count_parfor_OneDs(), 1) def test_set_column2(self): # create new column @@ -51,7 +51,7 @@ def test_impl(n): self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) - self.assertEqual(count_parfor_OneDs(), 2) + self.assertEqual(count_parfor_OneDs(), 1) def test_len_df(self): def test_impl(n): From 625a803a08f724884f82aa5b888d0f1392ae804f Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Wed, 25 Jul 2018 09:30:26 -0400 Subject: [PATCH 36/36] Series getitem typing consider Series index (intraday test) --- hpat/pd_series_ext.py | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index e98756be2..97ecbc636 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -374,21 +374,37 @@ class GetItemSeries(AbstractTemplate): def generic(self, args, kws): assert not kws - [series, idx] = args - if not isinstance(series, SeriesType): + [in_arr, in_idx] = args + is_arr_series = False + is_idx_series = False + + if not isinstance(in_arr, SeriesType) and not isinstance(in_idx, SeriesType): return None - ary = series_to_array_type(series) + + if isinstance(in_arr, SeriesType): + in_arr = series_to_array_type(in_arr) + is_arr_series = True + + if isinstance(in_idx, SeriesType): + in_idx = series_to_array_type(in_idx) + is_idx_series = True # TODO: dt_index - if ary == string_array_type: - sig = GetItemStringArray.generic(self, (ary, idx), kws) + if in_arr == string_array_type: + sig = GetItemStringArray.generic(self, (in_arr, in_idx), kws) else: - out = get_array_index_type(ary, idx) - sig = signature(out.result, ary, out.index) + out = get_array_index_type(in_arr, in_idx) + sig = signature(out.result, in_arr, out.index) if sig is not None: - sig.return_type = if_arr_to_series_type(sig.return_type) - sig.args = tuple(if_arr_to_series_type(a) for a in sig.args) + arg1 = sig.args[0] + arg2 = sig.args[1] + if is_arr_series: + sig.return_type = if_arr_to_series_type(sig.return_type) + arg1 = if_arr_to_series_type(arg1) + if is_idx_series: + arg2 = if_arr_to_series_type(arg2) + sig.args = (arg1, arg2) return sig @infer