diff --git a/hpat/compiler.py b/hpat/compiler.py index 2fc7eeeb0..172d26c0c 100644 --- a/hpat/compiler.py +++ b/hpat/compiler.py @@ -148,8 +148,11 @@ def define_pipelines(self, pm): # e.g. need to handle string array exprs before nopython rewrites # converts them to arrayexpr. # self.add_optimization_stage(pm) - pm.add_stage(self.stage_pre_parfor_pass, "Preprocessing for parfors") + # hiframes typed pass should be before pre_parfor since variable types + # need updating, and A.call to np.call transformation is invalid for + # Series (e.g. S.var is not the same as np.var(S)) pm.add_stage(self.stage_df_typed_pass, "typed hiframes pass") + pm.add_stage(self.stage_pre_parfor_pass, "Preprocessing for parfors") if not self.flags.no_rewrites: pm.add_stage(self.stage_nopython_rewrites, "nopython rewrites") if self.flags.auto_parallel.enabled: diff --git a/hpat/hiframes.py b/hpat/hiframes.py index 6814397cb..6d3c6ad18 100644 --- a/hpat/hiframes.py +++ b/hpat/hiframes.py @@ -1881,20 +1881,13 @@ def _get_renamed_df(self, df_var): def gen_empty_like(in_arr, out_arr): - scope = in_arr.scope - loc = in_arr.loc - # g_np_var = Global(numpy) - g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) - g_np = ir.Global('np', np, loc) - g_np_assign = ir.Assign(g_np, g_np_var, loc) - # attr call: empty_attr = getattr(g_np_var, empty_like) - empty_attr_call = ir.Expr.getattr(g_np_var, "empty_like", loc) - attr_var = ir.Var(scope, mk_unique_var("$empty_attr_attr"), loc) - attr_assign = ir.Assign(empty_attr_call, attr_var, loc) - # alloc call: out_arr = empty_attr(in_arr) - alloc_call = ir.Expr.call(attr_var, [in_arr], (), loc) - alloc_assign = ir.Assign(alloc_call, out_arr, loc) - return [g_np_assign, attr_assign, alloc_assign] + def f(A): # pragma: no cover + B = np.empty(A.shape, A.dtype) + f_block = compile_to_numba_ir(f, {'hpat': hpat, 'np': np}).blocks.popitem()[1] + replace_arg_nodes(f_block, [in_arr]) + nodes = f_block.body[:-3] # remove none return + nodes[-1].target = out_arr + return nodes def gen_stencil_call(in_arr, out_arr, kernel_func, index_offsets, fir_globals, @@ -1918,8 +1911,10 @@ def gen_stencil_call(in_arr, out_arr, kernel_func, index_offsets, fir_globals, stencil_nodes.append(ir.Assign(kernel_func, kernel_var, loc)) def f(A, B, f): # pragma: no cover - numba.stencil(f)(A, out=B) - f_block = compile_to_numba_ir(f, {'numba': numba}).blocks.popitem()[1] + in_arr = hpat.hiframes_api.to_arr_from_series(A) + numba.stencil(f)(in_arr, out=B) + f_block = compile_to_numba_ir(f, {'numba': numba, + 'hpat': hpat}).blocks.popitem()[1] replace_arg_nodes(f_block, [in_arr, out_arr, kernel_var]) stencil_nodes += f_block.body[:-3] # remove none return setup_call = stencil_nodes[-2].value diff --git a/hpat/hiframes_api.py b/hpat/hiframes_api.py index 100cb2ac5..6e5f73c5e 100644 --- a/hpat/hiframes_api.py +++ b/hpat/hiframes_api.py @@ -22,7 +22,9 @@ import numpy as np from hpat.pd_timestamp_ext import timestamp_series_type, pandas_timestamp_type import hpat -from hpat.pd_series_ext import SeriesType, BoxedSeriesType, string_series_type, arr_to_series_type, arr_to_boxed_series_type, series_to_array_type +from hpat.pd_series_ext import (SeriesType, BoxedSeriesType, + string_series_type, if_arr_to_series_type, arr_to_boxed_series_type, + series_to_array_type, if_series_to_array_type) # from numba.typing.templates import infer_getattr, AttributeTemplate, bound_function # from numba import types @@ -91,7 +93,8 @@ def generic(self, args, kws): ret_typ = string_array_type else: # use typer of np.concatenate - ret_typ = numba.typing.npydecl.NdConcatenate(self.context).generic()(arr_list) + arr_list_to_arr = if_series_to_array_type(arr_list) + ret_typ = numba.typing.npydecl.NdConcatenate(self.context).generic()(arr_list_to_arr) return signature(ret_typ, arr_list) @@ -656,7 +659,7 @@ def generic(self, args, kws): if isinstance(arr, BoxedSeriesType): series_type = SeriesType(arr.dtype, 1, 'C') else: - series_type = arr_to_series_type(arr) + series_type = if_arr_to_series_type(arr) assert series_type is not None, "unknown type for pd.Series: {}".format(arr) return signature(series_type, arr) @@ -664,19 +667,21 @@ def generic(self, args, kws): def to_series_dummy_impl(context, builder, sig, args): return impl_ret_borrowed(context, builder, sig.return_type, args[0]) -def if_series_to_array_type(typ, replace_boxed=False): - if isinstance(typ, SeriesType): - return series_to_array_type(typ, replace_boxed) - # XXX: Boxed series variable types shouldn't be replaced in hiframes_typed - # it results in cast error for call dummy_unbox_series - if replace_boxed and isinstance(typ, BoxedSeriesType): - return series_to_array_type(typ, replace_boxed) - if isinstance(typ, (types.Tuple, types.UniTuple)): - return types.Tuple( - [if_series_to_array_type(t, replace_boxed) for t in typ.types]) - # TODO: other types than can have Series inside: list, set, etc. - return typ +def to_arr_from_series(arr): + return arr + +@infer_global(to_arr_from_series) +class ToArrFromSeriesType(AbstractTemplate): + def generic(self, args, kws): + assert not kws + assert len(args) == 1 + arr = args[0] + return signature(if_series_to_array_type(arr), arr) + +@lower_builtin(to_arr_from_series, types.Any) +def to_arr_from_series_dummy_impl(context, builder, sig, args): + return impl_ret_borrowed(context, builder, sig.return_type, args[0]) # dummy func to convert input series to array type def dummy_unbox_series(arr): diff --git a/hpat/hiframes_typed.py b/hpat/hiframes_typed.py index 12af8dba6..2e793d945 100644 --- a/hpat/hiframes_typed.py +++ b/hpat/hiframes_typed.py @@ -8,15 +8,16 @@ find_topo_order, gen_np_call, get_definition, guard, find_callname, mk_alloc, find_const, is_setitem, is_getitem) -from numba.typing.templates import Signature +from numba.typing.templates import Signature, bound_function, signature +from numba.typing.arraydecl import ArrayAttribute import hpat from hpat.utils import get_definitions, debug_prints from hpat.hiframes import include_new_blocks, gen_empty_like -from hpat.hiframes_api import if_series_to_array_type from hpat.str_ext import string_type from hpat.str_arr_ext import string_array_type, StringArrayType, is_str_arr_typ from hpat.pd_series_ext import (SeriesType, string_series_type, - series_to_array_type, BoxedSeriesType, dt_index_series_type) + series_to_array_type, BoxedSeriesType, dt_index_series_type, + if_series_to_array_type) class HiFramesTyped(object): @@ -54,13 +55,28 @@ def run(self): blocks[label].body = new_body if debug_prints(): # pragma: no cover - print("types before Series replacement:", self.typemap) + print("--- types before Series replacement:", self.typemap) + print("calltypes: ", self.calltypes) + replace_series = {} for vname, typ in self.typemap.items(): if isinstance(typ, SeriesType): # print("replacing series type", vname) new_typ = series_to_array_type(typ) replace_series[vname] = new_typ + # replace array.call() variable types + if isinstance(typ, types.BoundFunction) and isinstance(typ.this, SeriesType): + this = series_to_array_type(typ.this) + # TODO: handle string arrays, etc. + assert typ.typing_key.startswith('array.') + attr = typ.typing_key[len('array.'):] + resolver = getattr(ArrayAttribute, 'resolve_'+attr) + # methods are either installed with install_array_method or + # using @bound_function in arraydecl.py + if hasattr(resolver, '__wrapped__'): + resolver = bound_function(typ.typing_key)(resolver.__wrapped__) + new_typ = resolver(ArrayAttribute(self.typingctx), this) + replace_series[vname] = new_typ for vname, typ in replace_series.items(): self.typemap.pop(vname) @@ -79,7 +95,22 @@ def run(self): # reusing sig.args since some types become Const in sig argtyps = sig.args[:len(call.args)] kwtyps = {name: self.typemap[v.name] for name, v in call.kws} - self.typemap[call.func.name].get_call_type(self.typingctx , argtyps, kwtyps) + + new_sig = self.typemap[call.func.name].get_call_type( + self.typingctx , argtyps, kwtyps) + # calltypes of things like BoundFunction (array.call) need to + # be update for lowering to work + # XXX: new_sig could be None for things like np.int32() + if call in self.calltypes and new_sig is not None: + old_sig = self.calltypes.pop(call) + # fix types with undefined dtypes in empty_inferred, etc. + return_type = _fix_typ_undefs(new_sig.return_type, old_sig.return_type) + args = tuple(_fix_typ_undefs(a, b) for a,b in zip(new_sig.args, old_sig.args)) + self.calltypes[call] = Signature(return_type, args, new_sig.recvr, new_sig.pysig) + + if debug_prints(): # pragma: no cover + print("--- types after Series replacement:", self.typemap) + print("calltypes: ", self.calltypes) self.func_ir._definitions = get_definitions(self.func_ir.blocks) return if_series_to_array_type(self.return_type) @@ -130,7 +161,7 @@ def _run_assign(self, assign): return [assign] def _run_call_hiframes(self, assign, lhs, rhs, func_name): - if func_name == 'to_series_type': + if func_name in ('to_series_type', 'to_arr_from_series'): assign.value = rhs.args[0] return [assign] @@ -155,7 +186,7 @@ def f(column): # pragma: no cover a = column.astype(np.float64) f_block = compile_to_numba_ir(f, {'hpat': hpat, 'np': np}, self.typingctx, - (self.typemap[in_arr.name],), + (if_series_to_array_type(self.typemap[in_arr.name]),), self.typemap, self.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [in_arr]) nodes = f_block.body[:-3] @@ -204,8 +235,8 @@ def _handle_dt_index_binop(self, lhs, rhs, assign): f_blocks = compile_to_numba_ir(f, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[arg1.name], - self.typemap[arg2.name]), + (if_series_to_array_type(self.typemap[arg1.name]), + if_series_to_array_type(self.typemap[arg2.name])), self.typemap, self.calltypes).blocks replace_arg_nodes(f_blocks[min(f_blocks.keys())], [arg1, arg2]) # replace == expression with result of parfor (S) @@ -248,8 +279,8 @@ def _handle_string_array_expr(self, lhs, rhs, assign): f = loc_vars['f'] f_blocks = compile_to_numba_ir(f, {'numba': numba, 'np': np}, self.typingctx, - (self.typemap[arg1.name], - self.typemap[arg2.name]), + (if_series_to_array_type(self.typemap[arg1.name]), + if_series_to_array_type(self.typemap[arg2.name])), self.typemap, self.calltypes).blocks replace_arg_nodes(f_blocks[min(f_blocks.keys())], [arg1, arg2]) # replace == expression with result of parfor (S) @@ -273,7 +304,7 @@ def f(_in_arr): # pragma: no cover _alloc_size = _in_arr.shape _out_arr = np.empty(_alloc_size, _in_arr.dtype) - f_block = compile_to_numba_ir(f, {'np': np}, self.typingctx, (self.typemap[in_arr.name],), + f_block = compile_to_numba_ir(f, {'np': np}, self.typingctx, (if_series_to_array_type(self.typemap[in_arr.name]),), self.typemap, self.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [in_arr]) nodes = f_block.body[:-3] # remove none return @@ -302,8 +333,8 @@ def _handle_str_contains(self, assign, lhs, rhs, fname): f_blocks = compile_to_numba_ir(f, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[str_arr.name], - self.typemap[pat.name]), + (if_series_to_array_type(self.typemap[str_arr.name]), + if_series_to_array_type(self.typemap[pat.name])), self.typemap, self.calltypes).blocks replace_arg_nodes(f_blocks[min(f_blocks.keys())], [str_arr, pat]) # replace call with result of parfor (S) @@ -324,7 +355,7 @@ def _handle_df_col_filter(self, lhs_name, rhs, assign): index_var = rhs.index f_blocks = compile_to_numba_ir(_column_filter_impl_float, {'numba': numba, 'np': np}, self.typingctx, - (self.typemap[lhs.name], self.typemap[in_arr.name], + (if_series_to_array_type(self.typemap[lhs.name]), if_series_to_array_type(self.typemap[in_arr.name]), self.typemap[index_var.name]), self.typemap, self.calltypes).blocks first_block = min(f_blocks.keys()) @@ -347,7 +378,7 @@ def f(_in_arr, _ind): f_block = compile_to_numba_ir(f, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[in_arr.name], types.intp), + (if_series_to_array_type(self.typemap[in_arr.name]), types.intp), self.typemap, self.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [in_arr, ind]) nodes = f_block.body[:-3] # remove none return @@ -359,7 +390,7 @@ def f(_in_arr, _ind): f_blocks = compile_to_numba_ir(_column_count_impl, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[in_arr.name],), + (if_series_to_array_type(self.typemap[in_arr.name]),), self.typemap, self.calltypes).blocks topo_order = find_topo_order(f_blocks) first_block = topo_order[0] @@ -375,8 +406,8 @@ def f(_in_arr, _ind): val = rhs.args[2] f_blocks = compile_to_numba_ir(_column_fillna_impl, {'numba': numba, 'np': np}, self.typingctx, - (self.typemap[out_arr.name], self.typemap[in_arr.name], - self.typemap[val.name]), + (if_series_to_array_type(self.typemap[out_arr.name]), if_series_to_array_type(self.typemap[in_arr.name]), + if_series_to_array_type(self.typemap[val.name])), self.typemap, self.calltypes).blocks first_block = min(f_blocks.keys()) replace_arg_nodes(f_blocks[first_block], [out_arr, in_arr, val]) @@ -387,7 +418,7 @@ def f(_in_arr, _ind): f_blocks = compile_to_numba_ir(_column_sum_impl, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[in_arr.name],), + (if_series_to_array_type(self.typemap[in_arr.name]),), self.typemap, self.calltypes).blocks topo_order = find_topo_order(f_blocks) first_block = topo_order[0] @@ -402,7 +433,7 @@ def f(_in_arr, _ind): f_blocks = compile_to_numba_ir(_column_mean_impl, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[in_arr.name],), + (if_series_to_array_type(self.typemap[in_arr.name]),), self.typemap, self.calltypes).blocks topo_order = find_topo_order(f_blocks) first_block = topo_order[0] @@ -417,7 +448,7 @@ def f(_in_arr, _ind): f_blocks = compile_to_numba_ir(_column_var_impl, {'numba': numba, 'np': np, 'hpat': hpat}, self.typingctx, - (self.typemap[in_arr.name],), + (if_series_to_array_type(self.typemap[in_arr.name]),), self.typemap, self.calltypes).blocks topo_order = find_topo_order(f_blocks) first_block = topo_order[0] @@ -433,6 +464,18 @@ def is_bool_arr(self, varname): typ = self.typemap[varname] return isinstance(typ, types.npytypes.Array) and typ.dtype == types.bool_ +def _fix_typ_undefs(new_typ, old_typ): + if isinstance(old_typ, (types.Array, SeriesType)): + assert isinstance(new_typ, (types.Array, SeriesType)) + if new_typ.dtype == types.undefined: + return new_typ.copy(old_typ.dtype) + if isinstance(old_typ, (types.Tuple, types.UniTuple)): + return types.Tuple([_fix_typ_undefs(t, u) + for t, u in zip(new_typ.types, old_typ.types)]) + # TODO: fix List, Set + return new_typ + + # float columns can have regular np.nan diff --git a/hpat/pd_series_ext.py b/hpat/pd_series_ext.py index ebb32f246..97ecbc636 100644 --- a/hpat/pd_series_ext.py +++ b/hpat/pd_series_ext.py @@ -1,24 +1,40 @@ +import numpy as np +import numba from numba import types from numba.extending import (models, register_model, lower_cast, infer_getattr, type_callable, infer) from numba.typing.templates import (infer_global, AbstractTemplate, signature, - AttributeTemplate) + AttributeTemplate, bound_function) +from numba.typing.arraydecl import (get_array_index_type, _expand_integer, + ArrayAttribute, SetItemBuffer) +from numba.typing.npydecl import (Numpy_rules_ufunc, NumpyRulesArrayOperator, + NumpyRulesInplaceArrayOperator, NumpyRulesUnaryArrayOperator, + NdConstructorLike) import hpat from hpat.str_ext import string_type from hpat.str_arr_ext import (string_array_type, offset_typ, char_typ, - str_arr_payload_type, StringArrayType) + str_arr_payload_type, StringArrayType, GetItemStringArray) +from hpat.pd_timestamp_ext import pandas_timestamp_type # TODO: implement type inference instead of subtyping array since Pandas as of # 0.23 is deprecating things like itemsize etc. -class SeriesType(types.Array): +# class SeriesType(types.ArrayCompatible): +class SeriesType(types.IterableType): """Temporary type class for Series objects. """ - array_priority = 1000 + # array_priority = 1000 def __init__(self, dtype, ndim, layout, readonly=False, name=None, aligned=True): - # same as types.Array, except name is Series + # same as types.Array, except name is Series, and buffer attributes + # initialized here assert ndim == 1, "Series() should be one dimensional" assert name is None + self.mutable = True + self.aligned = True + self.dtype = dtype + self.ndim = ndim + self.layout = layout + if readonly: self.mutable = False if (not aligned or @@ -31,7 +47,15 @@ def __init__(self, dtype, ndim, layout, readonly=False, name=None, if not self.aligned: type_name = "unaligned " + type_name name = "%s(%s, %sd, %s)" % (type_name, dtype, ndim, layout) - super(SeriesType, self).__init__(dtype, ndim, layout, name=name) + super(SeriesType, self).__init__(name=name) + + @property + def mangling_args(self): + # same as types.Array + args = [self.dtype, self.ndim, self.layout, + 'mutable' if self.mutable else 'readonly', + 'aligned' if self.aligned else 'unaligned'] + return self.__class__.__name__, args def copy(self, dtype=None, ndim=None, layout=None, readonly=None): # same as types.Array, except Series return type @@ -46,6 +70,11 @@ def copy(self, dtype=None, ndim=None, layout=None, readonly=None): return SeriesType(dtype=dtype, ndim=ndim, layout=layout, readonly=readonly, aligned=self.aligned) + @property + def key(self): + # same as types.Array + return self.dtype, self.ndim, self.layout, self.mutable, self.aligned + def unify(self, typingctx, other): # same as types.Array, except returns Series for Series/Series # If other is array and the ndim matches @@ -64,6 +93,45 @@ def unify(self, typingctx, other): # XXX: unify Series/Array as Array return super(SeriesType, self).unify(typingctx, other) + # @property + # def as_array(self): + # return types.Array(self.dtype, self.ndim, self.layout) + + def can_convert_to(self, typingctx, other): + # same as types.Array, TODO: add Series? + if (isinstance(other, types.Array) and other.ndim == self.ndim + and other.dtype == self.dtype): + if (other.layout in ('A', self.layout) + and (self.mutable or not other.mutable) + and (self.aligned or not other.aligned)): + return types.Conversion.safe + + def is_precise(self): + # same as types.Array + return self.dtype.is_precise() + + @property + def iterator_type(self): + # same as Buffer + # TODO: fix timestamp + return types.iterators.ArrayIterator(self) + + @property + def is_c_contig(self): + # same as Buffer + return self.layout == 'C' or (self.ndim <= 1 and self.layout in 'CF') + + @property + def is_f_contig(self): + # same as Buffer + return self.layout == 'F' or (self.ndim <= 1 and self.layout in 'CF') + + @property + def is_contig(self): + # same as Buffer + return self.layout in 'CF' + + string_series_type = SeriesType(string_type, 1, 'C', True) # TODO: create a separate DatetimeIndex type from Series dt_index_series_type = SeriesType(types.NPDatetime('ns'), 1, 'C') @@ -143,6 +211,27 @@ def arr_to_boxed_series_type(arr): return series_type +def if_series_to_array_type(typ, replace_boxed=False): + if isinstance(typ, SeriesType): + return series_to_array_type(typ, replace_boxed) + # XXX: Boxed series variable types shouldn't be replaced in hiframes_typed + # it results in cast error for call dummy_unbox_series + if replace_boxed and isinstance(typ, BoxedSeriesType): + return series_to_array_type(typ, replace_boxed) + if isinstance(typ, (types.Tuple, types.UniTuple)): + return types.Tuple( + [if_series_to_array_type(t, replace_boxed) for t in typ.types]) + # TODO: other types than can have Series inside: list, set, etc. + return typ + +def if_arr_to_series_type(typ): + if isinstance(typ, types.Array) or typ == string_array_type: + return arr_to_series_type(typ) + if isinstance(typ, (types.Tuple, types.UniTuple)): + return types.Tuple([if_arr_to_series_type(t) for t in typ.types]) + # TODO: other types than can have Arrays inside: list, set, etc. + return typ + @lower_cast(string_series_type, string_array_type) @lower_cast(string_array_type, string_series_type) def cast_string_series(context, builder, fromty, toty, val): @@ -153,25 +242,43 @@ def cast_string_series(context, builder, fromty, toty, val): def cast_series(context, builder, fromty, toty, val): return val +# --------------------------------------------------------------------------- # +# --- typing similar to arrays adopted from arraydecl.py, npydecl.py -------- # + + @infer_getattr -class ArrayAttribute(AttributeTemplate): +class SeriesAttribute(AttributeTemplate): key = SeriesType def resolve_values(self, ary): return series_to_array_type(ary, True) + @bound_function("array.argsort") + def resolve_argsort(self, ary, args, kws): + resolver = ArrayAttribute.resolve_argsort.__wrapped__ + sig = resolver(self, ary, args, kws) + sig.return_type = if_arr_to_series_type(sig.return_type) + return sig + + @bound_function("array.take") + def resolve_take(self, ary, args, kws): + resolver = ArrayAttribute.resolve_take.__wrapped__ + sig = resolver(self, ary, args, kws) + sig.return_type = if_arr_to_series_type(sig.return_type) + return sig + # TODO: use ops logic from pandas/core/ops.py -# called from numba/numpy_support.py:resolve_output_type -# similar to SmartArray (targets/smartarray.py) -@type_callable('__array_wrap__') -def type_series_array_wrap(context): - def typer(input_type, result): - if isinstance(input_type, SeriesType): - return input_type.copy(dtype=result.dtype, - ndim=result.ndim, - layout=result.layout) - - return typer +# # called from numba/numpy_support.py:resolve_output_type +# # similar to SmartArray (targets/smartarray.py) +# @type_callable('__array_wrap__') +# def type_series_array_wrap(context): +# def typer(input_type, result): +# if isinstance(input_type, SeriesType): +# return input_type.copy(dtype=result.dtype, +# ndim=result.ndim, +# layout=result.layout) + +# return typer @infer class SeriesCompEqual(AbstractTemplate): @@ -210,3 +317,179 @@ class CmpOpLESeries(SeriesCompEqual): @infer class CmpOpLTSeries(SeriesCompEqual): key = '<' + +# @infer +# class GetItemBuffer(AbstractTemplate): +# key = "getitem" + +# def generic(self, args, kws): +# assert not kws +# [ary, idx] = args +# import pdb; pdb.set_trace() +# if not isinstance(ary, SeriesType): +# return +# out = get_array_index_type(ary, idx) +# # check result to be dt64 since it might be sliced array +# # replace result with Timestamp +# if out is not None and out.result == types.NPDatetime('ns'): +# return signature(pandas_timestamp_type, ary, out.index) + +def install_array_method(name, generic, support_literals=False): + # taken from arraydecl.py, Series instead of Array + my_attr = {"key": "array." + name, "generic": generic} + temp_class = type("Series_" + name, (AbstractTemplate,), my_attr) + if support_literals: + temp_class.support_literals = support_literals + def array_attribute_attachment(self, ary): + return types.BoundFunction(temp_class, ary) + + setattr(SeriesAttribute, "resolve_" + name, array_attribute_attachment) + +def generic_expand_cumulative_series(self, args, kws): + # taken from arraydecl.py, replaced Array with Series + assert not args + assert not kws + assert isinstance(self.this, SeriesType) + return_type = SeriesType(dtype=_expand_integer(self.this.dtype), + ndim=1, layout='C') + return signature(return_type, recvr=self.this) + +# replacing cumsum/cumprod since arraydecl.py definition uses types.Array +for fname in ["cumsum", "cumprod"]: + install_array_method(fname, generic_expand_cumulative_series) + +# TODO: add itemsize, strides, etc. when removed from Pandas +_not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten'] + +# use ArrayAttribute for attributes not defined in SeriesAttribute +for attr, func in numba.typing.arraydecl.ArrayAttribute.__dict__.items(): + if (attr.startswith('resolve_') + and attr not in SeriesAttribute.__dict__ + and attr not in _not_series_array_attrs): + setattr(SeriesAttribute, attr, func) + +@infer +class GetItemSeries(AbstractTemplate): + key = "getitem" + + def generic(self, args, kws): + assert not kws + [in_arr, in_idx] = args + is_arr_series = False + is_idx_series = False + + if not isinstance(in_arr, SeriesType) and not isinstance(in_idx, SeriesType): + return None + + if isinstance(in_arr, SeriesType): + in_arr = series_to_array_type(in_arr) + is_arr_series = True + + if isinstance(in_idx, SeriesType): + in_idx = series_to_array_type(in_idx) + is_idx_series = True + + # TODO: dt_index + if in_arr == string_array_type: + sig = GetItemStringArray.generic(self, (in_arr, in_idx), kws) + else: + out = get_array_index_type(in_arr, in_idx) + sig = signature(out.result, in_arr, out.index) + + if sig is not None: + arg1 = sig.args[0] + arg2 = sig.args[1] + if is_arr_series: + sig.return_type = if_arr_to_series_type(sig.return_type) + arg1 = if_arr_to_series_type(arg1) + if is_idx_series: + arg2 = if_arr_to_series_type(arg2) + sig.args = (arg1, arg2) + return sig + +@infer +class SetItemSeries(SetItemBuffer): + key = "setitem" + + def generic(self, args, kws): + assert not kws + series, idx, val = args + if not isinstance(series, SeriesType): + return None + ary = series_to_array_type(series) + # TODO: strings, dt_index + res = super(SetItemSeries, self).generic((ary, idx, val), kws) + if res is not None: + new_series = if_arr_to_series_type(res.args[0]) + res.args = (new_series, res.args[1], res.args[2]) + return res + +def series_op_generic(cls, self, args, kws): + # return if no Series + if not any(isinstance(arg, SeriesType) for arg in args): + return None + # convert args to array + new_args = tuple(if_series_to_array_type(arg) for arg in args) + sig = super(cls, self).generic(new_args, kws) + # convert back to Series + if sig is not None: + sig.return_type = if_arr_to_series_type(sig.return_type) + sig.args = tuple(if_arr_to_series_type(a) for a in sig.args) + return sig + +class SeriesOpUfuncs(NumpyRulesArrayOperator): + def generic(self, args, kws): + return series_op_generic(SeriesOpUfuncs, self, args, kws) + +class SeriesInplaceOpUfuncs(NumpyRulesInplaceArrayOperator): + def generic(self, args, kws): + return series_op_generic(SeriesInplaceOpUfuncs, self, args, kws) + +class SeriesUnaryOpUfuncs(NumpyRulesUnaryArrayOperator): + def generic(self, args, kws): + return series_op_generic(SeriesUnaryOpUfuncs, self, args, kws) + +# TODO: change class name to Series in install_operations +SeriesOpUfuncs.install_operations() +SeriesInplaceOpUfuncs.install_operations() +SeriesUnaryOpUfuncs.install_operations() + +class Series_Numpy_rules_ufunc(Numpy_rules_ufunc): + def generic(self, args, kws): + return series_op_generic(Series_Numpy_rules_ufunc, self, args, kws) + +# copied from npydecl.py since deleted +_aliases = set(["bitwise_not", "mod", "abs"]) +if np.divide == np.true_divide: + _aliases.add("divide") + +for func in numba.typing.npydecl.supported_ufuncs: + name = func.__name__ + #_numpy_ufunc(func) + class typing_class(Series_Numpy_rules_ufunc): + key = func + + typing_class.__name__ = "resolve_series_{0}".format(name) + + if not name in _aliases: + infer_global(func, types.Function(typing_class)) + +@infer_global(len) +class LenSeriesType(AbstractTemplate): + def generic(self, args, kws): + if not kws and len(args) == 1 and isinstance(args[0], SeriesType): + return signature(types.intp, *args) + +# @infer_global(np.empty_like) +# @infer_global(np.zeros_like) +# @infer_global(np.ones_like) +# class SeriesLikeTyper(NdConstructorLike): +# def generic(self): +# typer = super(SeriesLikeTyper, self).generic() +# def wrapper(*args, **kws): +# new_args = tuple(if_series_to_array_type(arg) for arg in args) +# new_kws = {n:if_series_to_array_type(t) for n,t in kws.items()} +# return typer(*new_args, **new_kws) +# return wrapper + +#@infer_global(np.full_like) diff --git a/hpat/tests/test_hiframes.py b/hpat/tests/test_hiframes.py index 9360526bc..fb5ddd06a 100644 --- a/hpat/tests/test_hiframes.py +++ b/hpat/tests/test_hiframes.py @@ -37,7 +37,7 @@ def test_impl(n): self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) - self.assertEqual(count_parfor_OneDs(), 2) + self.assertEqual(count_parfor_OneDs(), 1) def test_set_column2(self): # create new column @@ -51,7 +51,7 @@ def test_impl(n): self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) - self.assertEqual(count_parfor_OneDs(), 2) + self.assertEqual(count_parfor_OneDs(), 1) def test_len_df(self): def test_impl(n): diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index dfd7ac5cd..2f276fd60 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -83,6 +83,70 @@ def test_impl(A): hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(df.A), test_impl(df.A)) + def test_series_attr1(self): + def test_impl(A): + return A.size + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + self.assertEqual(hpat_func(df.A), test_impl(df.A)) + + def test_series_attr2(self): + def test_impl(A): + return A.copy().values + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + + def test_series_attr3(self): + def test_impl(A): + return A.min() + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + self.assertEqual(hpat_func(df.A), test_impl(df.A)) + + def test_series_attr4(self): + def test_impl(A): + return A.cumsum().values + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + + @unittest.skip("needs argsort fix in canonicalize_array_math") + def test_series_attr5(self): + def test_impl(A): + return A.argsort().values + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + + def test_series_attr6(self): + def test_impl(A): + return A.take([2,3]).values + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + + def test_np_call_on_series1(self): + def test_impl(A): + return np.min(A) + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + def test_series_values1(self): def test_impl(A): return (A == 2).values @@ -112,6 +176,21 @@ def test_impl(A, i): hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(df.A, 0), test_impl(df.A, 0)) + def test_setitem_series2(self): + def test_impl(A, i): + A[i] = 100 + # TODO: remove return after aliasing fix + return A + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + A1 = df.A.copy() + A2 = df.A + hpat_func = hpat.jit(test_impl) + hpat_func(A1, 0) + test_impl(A2, 0) + np.testing.assert_array_equal(A1.values, A2.values) + def test_static_getitem_series1(self): def test_impl(A): return A[0] @@ -130,6 +209,69 @@ def test_impl(A, i): hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(df.A, 0), test_impl(df.A, 0)) + def test_getitem_series_str1(self): + def test_impl(A, i): + return A[i] + + df = pd.DataFrame({'A': ['aa', 'bb', 'cc']}) + hpat_func = hpat.jit(test_impl) + self.assertEqual(hpat_func(df.A, 0), test_impl(df.A, 0)) + + def test_series_op1(self): + def test_impl(A, i): + return A+A + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A, 0), test_impl(df.A, 0)) + + def test_series_op2(self): + def test_impl(A, i): + return A+i + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A, 1), test_impl(df.A, 1)) + + def test_series_op3(self): + def test_impl(A, i): + A += i + return A + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A.copy(), 1), test_impl(df.A, 1)) + + def test_series_len(self): + def test_impl(A, i): + return len(A) + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + self.assertEqual(hpat_func(df.A, 0), test_impl(df.A, 0)) + + def test_np_typ_call_replace(self): + # calltype replacement is tricky for np.typ() calls since variable + # type can't provide calltype + def test_impl(i): + return np.int32(i) + + hpat_func = hpat.jit(test_impl) + self.assertEqual(hpat_func(1), test_impl(1)) + + def test_series_ufunc1(self): + def test_impl(A, i): + return np.isinf(A).values + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A, 1), test_impl(df.A, 1)) + def test_list_convert(self): def test_impl(): df = pd.DataFrame({'one': np.array([-1, np.nan, 2.5]), @@ -143,5 +285,23 @@ def test_impl(): self.assertTrue(isinstance(two, np.ndarray)) self.assertTrue(isinstance(three, np.ndarray)) + @unittest.skip("needs empty_like typing fix in npydecl.py") + def test_series_empty_like(self): + def test_impl(A): + return np.empty_like(A) + + n = 11 + df = pd.DataFrame({'A': np.arange(n)}) + hpat_func = hpat.jit(test_impl) + self.assertTrue(isinstance(hpat_func(df.A), np.ndarray)) + + def test_series_fillna(self): + def test_impl(A): + return A.fillna(5.0) + + df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) + hpat_func = hpat.jit(test_impl) + np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A)) + if __name__ == "__main__": unittest.main()