Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions hpat/_hpat_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
// Float32 = 6
// Float64 = 7


struct HPAT_CTypes {
enum HPAT_CTypeEnum {
struct HPAT_CTypes
{
enum HPAT_CTypeEnum
{
INT8 = 0,
UINT8 = 1,
INT32 = 2,
Expand Down
152 changes: 75 additions & 77 deletions hpat/distributed.py

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions hpat/hiframes/pd_dataframe_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,7 @@ def merge_overload(left, right, how='inner', on=None, left_on=None,
# check if on's inferred type is NoneType and store the result,
# use it later to branch based on the value available at compile time
onHasNoneType = isinstance(numba.typeof(on), types.NoneType)

def _impl(left, right, how='inner', on=None, left_on=None,
right_on=None, left_index=False, right_index=False, sort=False,
suffixes=('_x', '_y'), copy=True, indicator=False, validate=None):
Expand All @@ -660,6 +661,7 @@ def merge_asof_overload(left, right, on=None, left_on=None, right_on=None,
# check if on's inferred type is NoneType and store the result,
# use it later to branch based on the value available at compile time
onHasNoneType = isinstance(numba.typeof(on), types.NoneType)

def _impl(left, right, on=None, left_on=None, right_on=None,
left_index=False, right_index=False, by=None, left_by=None,
right_by=None, suffixes=('_x', '_y'), tolerance=None,
Expand Down
28 changes: 15 additions & 13 deletions hpat/io/parquet_pio.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
from hpat.config import _has_pyarrow
import llvmlite.binding as ll
from llvmlite import ir as lir
from numba.targets.arrayobj import make_array
from numba.targets.imputils import lower_builtin
from numba import cgutils
import numba
from numba import ir, config, ir_utils, types
from numba.ir_utils import (mk_unique_var, replace_vars_inner, find_topo_order,
Expand Down Expand Up @@ -26,7 +32,6 @@
repr(types.NPDatetime('ns')): 3, 'int8': 6}



def read_parquet():
return 0

Expand Down Expand Up @@ -104,13 +109,14 @@ def gen_parquet_read(self, file_name, lhs):

out_nodes = []
# get arrow readers once

def init_arrow_readers(fname):
arrow_readers = get_arrow_readers(unicode_to_char_ptr(fname))

f_block = compile_to_numba_ir(init_arrow_readers,
{'get_arrow_readers': _get_arrow_readers,
'unicode_to_char_ptr': unicode_to_char_ptr,
}).blocks.popitem()[1]
{'get_arrow_readers': _get_arrow_readers,
'unicode_to_char_ptr': unicode_to_char_ptr,
}).blocks.popitem()[1]

replace_arg_nodes(f_block, [file_name])
out_nodes += f_block.body[:-3]
Expand All @@ -136,8 +142,8 @@ def cleanup_arrow_readers(readers):
s = del_arrow_readers(readers)

f_block = compile_to_numba_ir(cleanup_arrow_readers,
{'del_arrow_readers': _del_arrow_readers,
}).blocks.popitem()[1]
{'del_arrow_readers': _del_arrow_readers,
}).blocks.popitem()[1]
replace_arg_nodes(f_block, [arrow_readers_var])
out_nodes += f_block.body[:-3]
return col_names, col_arrs, out_nodes
Expand Down Expand Up @@ -194,6 +200,7 @@ def get_element_type(dtype):
out = 'bool_'
return out


def _get_numba_typ_from_pa_typ(pa_typ):
import pyarrow as pa
_typ_map = {
Expand Down Expand Up @@ -227,6 +234,7 @@ def _get_numba_typ_from_pa_typ(pa_typ):
raise ValueError("Arrow data type {} not supported yet".format(pa_typ))
return _typ_map[pa_typ]


def parquet_file_schema(file_name):
import pyarrow.parquet as pq
col_names = []
Expand All @@ -241,6 +249,7 @@ def parquet_file_schema(file_name):
# TODO: close file?
return col_names, col_types


def _rm_pd_index(col_names, col_types):
"""remove pandas index if found in columns
"""
Expand Down Expand Up @@ -300,13 +309,6 @@ def generic(self, args, kws):
return signature(types.int32, *unliteral_all(args))


from numba import cgutils
from numba.targets.imputils import lower_builtin
from numba.targets.arrayobj import make_array
from llvmlite import ir as lir
import llvmlite.binding as ll

from hpat.config import _has_pyarrow
if _has_pyarrow:
from .. import parquet_cpp
ll.add_symbol('get_arrow_readers', parquet_cpp.get_arrow_readers)
Expand Down
4 changes: 1 addition & 3 deletions hpat/io/pio.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from hpat.utils import find_str_const, debug_prints



def remove_h5(rhs, lives, call_list):
# the call is dead if the read array is dead
if call_list == ['h5read', 'io', pio_api] and rhs.args[6].name not in lives:
Expand Down Expand Up @@ -49,8 +48,7 @@ def handle_possible_h5_read(self, assign, lhs, rhs):
loc_vars = {}
exec(func_text, {}, loc_vars)
_h5_read_impl = loc_vars['_h5_read_impl']
f_block = compile_to_numba_ir(
_h5_read_impl, {'hpat': hpat}).blocks.popitem()[1]
f_block = compile_to_numba_ir(_h5_read_impl, {'hpat': hpat}).blocks.popitem()[1]
index_var = rhs.index if rhs.op == 'getitem' else rhs.index_var
replace_arg_nodes(f_block, [rhs.value, index_var])
nodes = f_block.body[:-3] # remove none return
Expand Down
34 changes: 30 additions & 4 deletions hpat/io/pio_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,31 @@ class H5FileType(types.Opaque):
def __init__(self):
super(H5FileType, self).__init__(name='H5FileType')


h5file_type = H5FileType()


class H5DatasetType(types.Opaque):
def __init__(self):
super(H5DatasetType, self).__init__(name='H5DatasetType')


h5dataset_type = H5DatasetType()


class H5GroupType(types.Opaque):
def __init__(self):
super(H5GroupType, self).__init__(name='H5GroupType')


h5group_type = H5GroupType()


class H5DatasetOrGroupType(types.Opaque):
def __init__(self):
super(H5DatasetOrGroupType, self).__init__(name='H5DatasetOrGroupType')


h5dataset_or_group_type = H5DatasetOrGroupType()

h5file_data_type = types.int64
Expand Down Expand Up @@ -77,11 +81,13 @@ def _create_dataset_typer(args, kws):
name = args[0] if len(args) > 0 else types.unliteral(kwargs['name'])
shape = args[1] if len(args) > 1 else types.unliteral(kwargs['shape'])
dtype = args[2] if len(args) > 2 else types.unliteral(kwargs['dtype'])

def create_dset_stub(name, shape, dtype):
pass
pysig = numba.utils.pysignature(create_dset_stub)
return signature(h5dataset_type, name, shape, dtype).replace(pysig=pysig)


@infer_getattr
class FileAttribute(AttributeTemplate):
key = h5file_type
Expand All @@ -104,6 +110,7 @@ def resolve_create_dataset(self, f_id, args, kws):
def resolve_create_group(self, f_id, args, kws):
return signature(h5group_type, *unliteral_all(args))


@infer_getattr
class GroupOrDatasetAttribute(AttributeTemplate):
key = h5dataset_or_group_type
Expand All @@ -123,6 +130,7 @@ class GroupAttribute(AttributeTemplate):
def resolve_create_dataset(self, f_id, args, kws):
return _create_dataset_typer(unliteral_all(args), kws)


@infer_global(operator.getitem)
class GetItemH5File(AbstractTemplate):
key = operator.getitem
Expand All @@ -136,13 +144,15 @@ def generic(self, args, kws):
if in_f == h5dataset_or_group_type and in_idx == string_type:
return signature(h5dataset_or_group_type, in_f, in_idx)


@infer_global(operator.setitem)
class SetItemH5Dset(AbstractTemplate):
def generic(self, args, kws):
assert not kws
if args[0] == h5dataset_type:
return signature(types.none, *args)


def h5g_get_num_objs():
return

Expand Down Expand Up @@ -180,9 +190,11 @@ def h5write():
"""dummy function for C h5_write"""
return


def h5_read_dummy():
return


@infer_global(h5_read_dummy)
class H5ReadType(AbstractTemplate):
def generic(self, args, kws):
Expand Down Expand Up @@ -264,8 +276,10 @@ def generic(self, args, kws):
assert len(args) == 2
return signature(string_type, *args)


sum_op = hpat.distributed_api.Reduce_Type.Sum.value


@numba.njit
def get_filter_read_indices(bool_arr):
indices = bool_arr.nonzero()[0]
Expand Down Expand Up @@ -297,6 +311,7 @@ def get_filter_read_indices(bool_arr):
end = hpat.distributed_api.get_end(n, n_pes, rank)
return all_indices[start:end]


@intrinsic
def tuple_to_ptr(typingctx, tuple_tp=None):
def codegen(context, builder, sig, args):
Expand All @@ -305,14 +320,25 @@ def codegen(context, builder, sig, args):
return builder.bitcast(ptr, lir.IntType(8).as_pointer())
return signature(types.voidptr, tuple_tp), codegen

_h5read_filter = types.ExternalFunction("hpat_h5_read_filter",
types.int32(h5dataset_or_group_type, types.int32, types.voidptr,
types.voidptr, types.intp, types.voidptr, types.int32, types.voidptr, types.int32))

_h5read_filter = types.ExternalFunction(
"hpat_h5_read_filter",
types.int32(
h5dataset_or_group_type,
types.int32,
types.voidptr,
types.voidptr,
types.intp,
types.voidptr,
types.int32,
types.voidptr,
types.int32))


@numba.njit
def h5read_filter(dset_id, ndim, starts, counts, is_parallel, out_arr, read_indices):
starts_ptr = tuple_to_ptr(starts)
counts_ptr = tuple_to_ptr(counts)
type_enum = hpat.distributed_api.get_type_enum(out_arr)
return _h5read_filter(dset_id, ndim, starts_ptr, counts_ptr, is_parallel,
out_arr.ctypes, type_enum, read_indices.ctypes, len(read_indices))
out_arr.ctypes, type_enum, read_indices.ctypes, len(read_indices))
14 changes: 6 additions & 8 deletions hpat/io/pio_lower.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
import hpat.io
from hpat.io import pio_api
from hpat.utils import _numba_to_c_type_map
from hpat.io.pio_api import (h5file_type, h5dataset_or_group_type, h5dataset_type,
h5group_type)
from hpat.io.pio_api import (h5file_type, h5dataset_or_group_type, h5dataset_type, h5group_type)
from hpat.str_ext import string_type, gen_get_unicode_chars, gen_std_str_to_unicode

from llvmlite import ir as lir
Expand Down Expand Up @@ -39,6 +38,7 @@

h5g_close = types.ExternalFunction("h5g_close", types.none(h5group_type))


@lower_builtin(operator.getitem, h5file_type, string_type)
@lower_builtin(operator.getitem, h5dataset_or_group_type, string_type)
def h5_open_dset_lower(context, builder, sig, args):
Expand Down Expand Up @@ -113,12 +113,9 @@ def h5_close(context, builder, sig, args):
builder.call(fn, args)
return context.get_dummy_value()

@lower_builtin("h5group.create_dataset", h5group_type, string_type,
types.UniTuple, string_type)
@lower_builtin("h5file.create_dataset", h5file_type, string_type,
types.UniTuple, string_type)
@lower_builtin(pio_api.h5create_dset, h5file_type, string_type,
types.UniTuple, string_type)
@lower_builtin("h5group.create_dataset", h5group_type, string_type, types.UniTuple, string_type)
@lower_builtin("h5file.create_dataset", h5file_type, string_type, types.UniTuple, string_type)
@lower_builtin(pio_api.h5create_dset, h5file_type, string_type, types.UniTuple, string_type)
def h5_create_dset(context, builder, sig, args):
fg_id, dset_name, counts, dtype_str = args

Expand Down Expand Up @@ -152,6 +149,7 @@ def h5_create_dset(context, builder, sig, args):

return builder.call(fn, call_args)


@lower_builtin("h5group.create_group", h5group_type, string_type)
@lower_builtin("h5file.create_group", h5file_type, string_type)
@lower_builtin(pio_api.h5create_group, h5file_type, string_type)
Expand Down
Loading