Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
87c6bf2
new series type independent of types.Array
Jul 23, 2018
8949039
set SeriesAttribute typing from ArrayAttribute if not overwritten
Jul 23, 2018
297d49e
avoid addition of array attrs that are not in series
Jul 23, 2018
a95c67c
avoid more array attrs that are not in series
Jul 23, 2018
66574eb
test series copy
Jul 23, 2018
572b8fd
fix series attr2 test
Jul 23, 2018
357c8c4
hiframes_typed replace BoundFunction types (array.call) for Series
Jul 23, 2018
b6cf81b
move hiframes_typed pass before pre_parfor_pass
Jul 24, 2018
d0a51b2
test np.call(Series)
Jul 24, 2018
41fc48b
replace Series methods without bound_function
Jul 24, 2018
ef09d13
Series cumsum/cumprod typing
Jul 24, 2018
7622882
Series typing for argsort
Jul 24, 2018
8979d2c
typing for Series.take
Jul 24, 2018
9c737c4
Series getitem typing
Jul 24, 2018
66e7a70
Series setitem typing
Jul 24, 2018
1e90d9b
Series operators
Jul 24, 2018
07050dc
test series op
Jul 24, 2018
b6cf43c
Series op typing cls fix
Jul 24, 2018
aaeba6e
inplace op Series test
Jul 24, 2018
f600aa9
Series ufunc install
Jul 24, 2018
28c18bc
test series ufunc
Jul 24, 2018
bf57690
add if to arr_to_series since Series is not Array subtype anymore
Jul 24, 2018
9d959b7
fix if_arr_to_series_type for string array
Jul 24, 2018
9b55560
series len
Jul 24, 2018
4b02d32
string Series getitem typing
Jul 24, 2018
11b25bd
fix series calltype replacement for np.typ() calls
Jul 24, 2018
c5475ed
fix Series call type replace for undefined types
Jul 24, 2018
5a749d8
hiframes_typed calltypes replace full signature to keep side-effects
Jul 24, 2018
b007d71
empty_like Series typing commented out pending Numba fix
Jul 24, 2018
7da9f77
replace hiframes empty_like with empty()
Jul 24, 2018
316b0b9
test series fillna
Jul 24, 2018
d082e44
hiframes_typed replace series type in compile_to_numba_ir
Jul 24, 2018
02226e4
fix Series concatenate() type
Jul 24, 2018
036dcfd
hiframes to_arr_from_series() in Stencil
Jul 25, 2018
2de0c83
update column test parfor counts (hiframes_typed before preparfor imp…
Jul 25, 2018
625a803
Series getitem typing consider Series index (intraday test)
Jul 25, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion hpat/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,11 @@ def define_pipelines(self, pm):
# e.g. need to handle string array exprs before nopython rewrites
# converts them to arrayexpr.
# self.add_optimization_stage(pm)
pm.add_stage(self.stage_pre_parfor_pass, "Preprocessing for parfors")
# hiframes typed pass should be before pre_parfor since variable types
# need updating, and A.call to np.call transformation is invalid for
# Series (e.g. S.var is not the same as np.var(S))
pm.add_stage(self.stage_df_typed_pass, "typed hiframes pass")
pm.add_stage(self.stage_pre_parfor_pass, "Preprocessing for parfors")
if not self.flags.no_rewrites:
pm.add_stage(self.stage_nopython_rewrites, "nopython rewrites")
if self.flags.auto_parallel.enabled:
Expand Down
27 changes: 11 additions & 16 deletions hpat/hiframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1881,20 +1881,13 @@ def _get_renamed_df(self, df_var):


def gen_empty_like(in_arr, out_arr):
scope = in_arr.scope
loc = in_arr.loc
# g_np_var = Global(numpy)
g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
g_np = ir.Global('np', np, loc)
g_np_assign = ir.Assign(g_np, g_np_var, loc)
# attr call: empty_attr = getattr(g_np_var, empty_like)
empty_attr_call = ir.Expr.getattr(g_np_var, "empty_like", loc)
attr_var = ir.Var(scope, mk_unique_var("$empty_attr_attr"), loc)
attr_assign = ir.Assign(empty_attr_call, attr_var, loc)
# alloc call: out_arr = empty_attr(in_arr)
alloc_call = ir.Expr.call(attr_var, [in_arr], (), loc)
alloc_assign = ir.Assign(alloc_call, out_arr, loc)
return [g_np_assign, attr_assign, alloc_assign]
def f(A): # pragma: no cover
B = np.empty(A.shape, A.dtype)
f_block = compile_to_numba_ir(f, {'hpat': hpat, 'np': np}).blocks.popitem()[1]
replace_arg_nodes(f_block, [in_arr])
nodes = f_block.body[:-3] # remove none return
nodes[-1].target = out_arr
return nodes


def gen_stencil_call(in_arr, out_arr, kernel_func, index_offsets, fir_globals,
Expand All @@ -1918,8 +1911,10 @@ def gen_stencil_call(in_arr, out_arr, kernel_func, index_offsets, fir_globals,
stencil_nodes.append(ir.Assign(kernel_func, kernel_var, loc))

def f(A, B, f): # pragma: no cover
numba.stencil(f)(A, out=B)
f_block = compile_to_numba_ir(f, {'numba': numba}).blocks.popitem()[1]
in_arr = hpat.hiframes_api.to_arr_from_series(A)
numba.stencil(f)(in_arr, out=B)
f_block = compile_to_numba_ir(f, {'numba': numba,
'hpat': hpat}).blocks.popitem()[1]
replace_arg_nodes(f_block, [in_arr, out_arr, kernel_var])
stencil_nodes += f_block.body[:-3] # remove none return
setup_call = stencil_nodes[-2].value
Expand Down
35 changes: 20 additions & 15 deletions hpat/hiframes_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
import numpy as np
from hpat.pd_timestamp_ext import timestamp_series_type, pandas_timestamp_type
import hpat
from hpat.pd_series_ext import SeriesType, BoxedSeriesType, string_series_type, arr_to_series_type, arr_to_boxed_series_type, series_to_array_type
from hpat.pd_series_ext import (SeriesType, BoxedSeriesType,
string_series_type, if_arr_to_series_type, arr_to_boxed_series_type,
series_to_array_type, if_series_to_array_type)

# from numba.typing.templates import infer_getattr, AttributeTemplate, bound_function
# from numba import types
Expand Down Expand Up @@ -91,7 +93,8 @@ def generic(self, args, kws):
ret_typ = string_array_type
else:
# use typer of np.concatenate
ret_typ = numba.typing.npydecl.NdConcatenate(self.context).generic()(arr_list)
arr_list_to_arr = if_series_to_array_type(arr_list)
ret_typ = numba.typing.npydecl.NdConcatenate(self.context).generic()(arr_list_to_arr)

return signature(ret_typ, arr_list)

Expand Down Expand Up @@ -656,27 +659,29 @@ def generic(self, args, kws):
if isinstance(arr, BoxedSeriesType):
series_type = SeriesType(arr.dtype, 1, 'C')
else:
series_type = arr_to_series_type(arr)
series_type = if_arr_to_series_type(arr)
assert series_type is not None, "unknown type for pd.Series: {}".format(arr)
return signature(series_type, arr)

@lower_builtin(to_series_type, types.Any)
def to_series_dummy_impl(context, builder, sig, args):
return impl_ret_borrowed(context, builder, sig.return_type, args[0])

def if_series_to_array_type(typ, replace_boxed=False):
if isinstance(typ, SeriesType):
return series_to_array_type(typ, replace_boxed)
# XXX: Boxed series variable types shouldn't be replaced in hiframes_typed
# it results in cast error for call dummy_unbox_series
if replace_boxed and isinstance(typ, BoxedSeriesType):
return series_to_array_type(typ, replace_boxed)
if isinstance(typ, (types.Tuple, types.UniTuple)):
return types.Tuple(
[if_series_to_array_type(t, replace_boxed) for t in typ.types])
# TODO: other types than can have Series inside: list, set, etc.
return typ

def to_arr_from_series(arr):
return arr

@infer_global(to_arr_from_series)
class ToArrFromSeriesType(AbstractTemplate):
def generic(self, args, kws):
assert not kws
assert len(args) == 1
arr = args[0]
return signature(if_series_to_array_type(arr), arr)

@lower_builtin(to_arr_from_series, types.Any)
def to_arr_from_series_dummy_impl(context, builder, sig, args):
return impl_ret_borrowed(context, builder, sig.return_type, args[0])

# dummy func to convert input series to array type
def dummy_unbox_series(arr):
Expand Down
87 changes: 65 additions & 22 deletions hpat/hiframes_typed.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@
find_topo_order, gen_np_call, get_definition, guard,
find_callname, mk_alloc, find_const, is_setitem,
is_getitem)
from numba.typing.templates import Signature
from numba.typing.templates import Signature, bound_function, signature
from numba.typing.arraydecl import ArrayAttribute
import hpat
from hpat.utils import get_definitions, debug_prints
from hpat.hiframes import include_new_blocks, gen_empty_like
from hpat.hiframes_api import if_series_to_array_type
from hpat.str_ext import string_type
from hpat.str_arr_ext import string_array_type, StringArrayType, is_str_arr_typ
from hpat.pd_series_ext import (SeriesType, string_series_type,
series_to_array_type, BoxedSeriesType, dt_index_series_type)
series_to_array_type, BoxedSeriesType, dt_index_series_type,
if_series_to_array_type)


class HiFramesTyped(object):
Expand Down Expand Up @@ -54,13 +55,28 @@ def run(self):
blocks[label].body = new_body

if debug_prints(): # pragma: no cover
print("types before Series replacement:", self.typemap)
print("--- types before Series replacement:", self.typemap)
print("calltypes: ", self.calltypes)

replace_series = {}
for vname, typ in self.typemap.items():
if isinstance(typ, SeriesType):
# print("replacing series type", vname)
new_typ = series_to_array_type(typ)
replace_series[vname] = new_typ
# replace array.call() variable types
if isinstance(typ, types.BoundFunction) and isinstance(typ.this, SeriesType):
this = series_to_array_type(typ.this)
# TODO: handle string arrays, etc.
assert typ.typing_key.startswith('array.')
attr = typ.typing_key[len('array.'):]
resolver = getattr(ArrayAttribute, 'resolve_'+attr)
# methods are either installed with install_array_method or
# using @bound_function in arraydecl.py
if hasattr(resolver, '__wrapped__'):
resolver = bound_function(typ.typing_key)(resolver.__wrapped__)
new_typ = resolver(ArrayAttribute(self.typingctx), this)
replace_series[vname] = new_typ

for vname, typ in replace_series.items():
self.typemap.pop(vname)
Expand All @@ -79,7 +95,22 @@ def run(self):
# reusing sig.args since some types become Const in sig
argtyps = sig.args[:len(call.args)]
kwtyps = {name: self.typemap[v.name] for name, v in call.kws}
self.typemap[call.func.name].get_call_type(self.typingctx , argtyps, kwtyps)

new_sig = self.typemap[call.func.name].get_call_type(
self.typingctx , argtyps, kwtyps)
# calltypes of things like BoundFunction (array.call) need to
# be update for lowering to work
# XXX: new_sig could be None for things like np.int32()
if call in self.calltypes and new_sig is not None:
old_sig = self.calltypes.pop(call)
# fix types with undefined dtypes in empty_inferred, etc.
return_type = _fix_typ_undefs(new_sig.return_type, old_sig.return_type)
args = tuple(_fix_typ_undefs(a, b) for a,b in zip(new_sig.args, old_sig.args))
self.calltypes[call] = Signature(return_type, args, new_sig.recvr, new_sig.pysig)

if debug_prints(): # pragma: no cover
print("--- types after Series replacement:", self.typemap)
print("calltypes: ", self.calltypes)

self.func_ir._definitions = get_definitions(self.func_ir.blocks)
return if_series_to_array_type(self.return_type)
Expand Down Expand Up @@ -130,7 +161,7 @@ def _run_assign(self, assign):
return [assign]

def _run_call_hiframes(self, assign, lhs, rhs, func_name):
if func_name == 'to_series_type':
if func_name in ('to_series_type', 'to_arr_from_series'):
assign.value = rhs.args[0]
return [assign]

Expand All @@ -155,7 +186,7 @@ def f(column): # pragma: no cover
a = column.astype(np.float64)
f_block = compile_to_numba_ir(f,
{'hpat': hpat, 'np': np}, self.typingctx,
(self.typemap[in_arr.name],),
(if_series_to_array_type(self.typemap[in_arr.name]),),
self.typemap, self.calltypes).blocks.popitem()[1]
replace_arg_nodes(f_block, [in_arr])
nodes = f_block.body[:-3]
Expand Down Expand Up @@ -204,8 +235,8 @@ def _handle_dt_index_binop(self, lhs, rhs, assign):
f_blocks = compile_to_numba_ir(f,
{'numba': numba, 'np': np, 'hpat': hpat},
self.typingctx,
(self.typemap[arg1.name],
self.typemap[arg2.name]),
(if_series_to_array_type(self.typemap[arg1.name]),
if_series_to_array_type(self.typemap[arg2.name])),
self.typemap, self.calltypes).blocks
replace_arg_nodes(f_blocks[min(f_blocks.keys())], [arg1, arg2])
# replace == expression with result of parfor (S)
Expand Down Expand Up @@ -248,8 +279,8 @@ def _handle_string_array_expr(self, lhs, rhs, assign):
f = loc_vars['f']
f_blocks = compile_to_numba_ir(f,
{'numba': numba, 'np': np}, self.typingctx,
(self.typemap[arg1.name],
self.typemap[arg2.name]),
(if_series_to_array_type(self.typemap[arg1.name]),
if_series_to_array_type(self.typemap[arg2.name])),
self.typemap, self.calltypes).blocks
replace_arg_nodes(f_blocks[min(f_blocks.keys())], [arg1, arg2])
# replace == expression with result of parfor (S)
Expand All @@ -273,7 +304,7 @@ def f(_in_arr): # pragma: no cover
_alloc_size = _in_arr.shape
_out_arr = np.empty(_alloc_size, _in_arr.dtype)

f_block = compile_to_numba_ir(f, {'np': np}, self.typingctx, (self.typemap[in_arr.name],),
f_block = compile_to_numba_ir(f, {'np': np}, self.typingctx, (if_series_to_array_type(self.typemap[in_arr.name]),),
self.typemap, self.calltypes).blocks.popitem()[1]
replace_arg_nodes(f_block, [in_arr])
nodes = f_block.body[:-3] # remove none return
Expand Down Expand Up @@ -302,8 +333,8 @@ def _handle_str_contains(self, assign, lhs, rhs, fname):
f_blocks = compile_to_numba_ir(f,
{'numba': numba, 'np': np,
'hpat': hpat}, self.typingctx,
(self.typemap[str_arr.name],
self.typemap[pat.name]),
(if_series_to_array_type(self.typemap[str_arr.name]),
if_series_to_array_type(self.typemap[pat.name])),
self.typemap, self.calltypes).blocks
replace_arg_nodes(f_blocks[min(f_blocks.keys())], [str_arr, pat])
# replace call with result of parfor (S)
Expand All @@ -324,7 +355,7 @@ def _handle_df_col_filter(self, lhs_name, rhs, assign):
index_var = rhs.index
f_blocks = compile_to_numba_ir(_column_filter_impl_float,
{'numba': numba, 'np': np}, self.typingctx,
(self.typemap[lhs.name], self.typemap[in_arr.name],
(if_series_to_array_type(self.typemap[lhs.name]), if_series_to_array_type(self.typemap[in_arr.name]),
self.typemap[index_var.name]),
self.typemap, self.calltypes).blocks
first_block = min(f_blocks.keys())
Expand All @@ -347,7 +378,7 @@ def f(_in_arr, _ind):

f_block = compile_to_numba_ir(f, {'numba': numba, 'np': np,
'hpat': hpat}, self.typingctx,
(self.typemap[in_arr.name], types.intp),
(if_series_to_array_type(self.typemap[in_arr.name]), types.intp),
self.typemap, self.calltypes).blocks.popitem()[1]
replace_arg_nodes(f_block, [in_arr, ind])
nodes = f_block.body[:-3] # remove none return
Expand All @@ -359,7 +390,7 @@ def f(_in_arr, _ind):
f_blocks = compile_to_numba_ir(_column_count_impl,
{'numba': numba, 'np': np,
'hpat': hpat}, self.typingctx,
(self.typemap[in_arr.name],),
(if_series_to_array_type(self.typemap[in_arr.name]),),
self.typemap, self.calltypes).blocks
topo_order = find_topo_order(f_blocks)
first_block = topo_order[0]
Expand All @@ -375,8 +406,8 @@ def f(_in_arr, _ind):
val = rhs.args[2]
f_blocks = compile_to_numba_ir(_column_fillna_impl,
{'numba': numba, 'np': np}, self.typingctx,
(self.typemap[out_arr.name], self.typemap[in_arr.name],
self.typemap[val.name]),
(if_series_to_array_type(self.typemap[out_arr.name]), if_series_to_array_type(self.typemap[in_arr.name]),
if_series_to_array_type(self.typemap[val.name])),
self.typemap, self.calltypes).blocks
first_block = min(f_blocks.keys())
replace_arg_nodes(f_blocks[first_block], [out_arr, in_arr, val])
Expand All @@ -387,7 +418,7 @@ def f(_in_arr, _ind):
f_blocks = compile_to_numba_ir(_column_sum_impl,
{'numba': numba, 'np': np,
'hpat': hpat}, self.typingctx,
(self.typemap[in_arr.name],),
(if_series_to_array_type(self.typemap[in_arr.name]),),
self.typemap, self.calltypes).blocks
topo_order = find_topo_order(f_blocks)
first_block = topo_order[0]
Expand All @@ -402,7 +433,7 @@ def f(_in_arr, _ind):
f_blocks = compile_to_numba_ir(_column_mean_impl,
{'numba': numba, 'np': np,
'hpat': hpat}, self.typingctx,
(self.typemap[in_arr.name],),
(if_series_to_array_type(self.typemap[in_arr.name]),),
self.typemap, self.calltypes).blocks
topo_order = find_topo_order(f_blocks)
first_block = topo_order[0]
Expand All @@ -417,7 +448,7 @@ def f(_in_arr, _ind):
f_blocks = compile_to_numba_ir(_column_var_impl,
{'numba': numba, 'np': np,
'hpat': hpat}, self.typingctx,
(self.typemap[in_arr.name],),
(if_series_to_array_type(self.typemap[in_arr.name]),),
self.typemap, self.calltypes).blocks
topo_order = find_topo_order(f_blocks)
first_block = topo_order[0]
Expand All @@ -433,6 +464,18 @@ def is_bool_arr(self, varname):
typ = self.typemap[varname]
return isinstance(typ, types.npytypes.Array) and typ.dtype == types.bool_

def _fix_typ_undefs(new_typ, old_typ):
if isinstance(old_typ, (types.Array, SeriesType)):
assert isinstance(new_typ, (types.Array, SeriesType))
if new_typ.dtype == types.undefined:
return new_typ.copy(old_typ.dtype)
if isinstance(old_typ, (types.Tuple, types.UniTuple)):
return types.Tuple([_fix_typ_undefs(t, u)
for t, u in zip(new_typ.types, old_typ.types)])
# TODO: fix List, Set
return new_typ


# float columns can have regular np.nan


Expand Down
Loading