From 45bbc801c5902188143ae21f53804910fc11ce1d Mon Sep 17 00:00:00 2001 From: "Kozlov, Alexey" Date: Mon, 19 Oct 2020 03:34:17 +0300 Subject: [PATCH 1/4] Implements init_dataframe as multiple codegen functions Motivation: init_dataframe was implemented via Numba intrinsic taking *args, which seems to generate redundant extractvalue/insertvalue LLVM instructions, producing quadratic IR when number of DF columns grows and affecting total compilation time of function that create large DFs. This PR replaces singe init_dataframe with multiple functions basing on number of columns in a DF which are generated at compile time, thus avoiding use of *args. --- .../hpat_pandas_dataframe_functions.py | 2 +- sdc/hiframes/api.py | 8 +- sdc/hiframes/pd_dataframe_ext.py | 60 --------- sdc/hiframes/pd_series_ext.py | 2 +- sdc/rewrites/dataframe_constructor.py | 124 ++++++++++++++++-- sdc/tests/test_dataframe.py | 9 ++ 6 files changed, 130 insertions(+), 75 deletions(-) diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py index 8b3cf79bc..a61c89862 100644 --- a/sdc/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -1462,7 +1462,7 @@ def df_getitem_slice_idx_main_codelines(self, idx): res_data = f'res_data_{i}' func_lines += [ f' data_{i} = self._data[{type_id}][{col_id}][idx]', - f' {res_data} = pandas.Series(data_{i}, index=res_index, name="{col}")' + f' {res_data} = data_{i}' ] results.append((col, res_data)) diff --git a/sdc/hiframes/api.py b/sdc/hiframes/api.py index b1e22f536..08d3fc64d 100644 --- a/sdc/hiframes/api.py +++ b/sdc/hiframes/api.py @@ -172,20 +172,20 @@ def fix_df_index(index, *columns): @overload(fix_df_index) -def fix_df_index_overload(index, *columns): +def fix_df_index_overload(index): # TO-DO: replace types.none index with separate type, e.g. DefaultIndex if (index is None or isinstance(index, types.NoneType)): - def fix_df_index_impl(index, *columns): + def fix_df_index_impl(index): return None elif isinstance(index, RangeIndexType): - def fix_df_index_impl(index, *columns): + def fix_df_index_impl(index): return index else: # default case, transform index the same as df data - def fix_df_index_impl(index, *columns): + def fix_df_index_impl(index): return fix_df_array(index) return fix_df_index_impl diff --git a/sdc/hiframes/pd_dataframe_ext.py b/sdc/hiframes/pd_dataframe_ext.py index f863423a3..8efc55947 100644 --- a/sdc/hiframes/pd_dataframe_ext.py +++ b/sdc/hiframes/pd_dataframe_ext.py @@ -85,66 +85,6 @@ def get_structure_maps(col_types, col_names): return column_loc, data_typs_map, types_order -@intrinsic -def init_dataframe(typingctx, *args): - """Create a DataFrame with provided data, index and columns values. - Used as a single constructor for DataFrame and assigning its data, so that - optimization passes can look for init_dataframe() to see if underlying - data has changed, and get the array variables from init_dataframe() args if - not changed. - """ - - n_cols = len(args) // 2 - data_typs = tuple(args[:n_cols]) - index_typ = args[n_cols] - column_names = tuple(a.literal_value for a in args[n_cols + 1:]) - - column_loc, data_typs_map, types_order = get_structure_maps(data_typs, column_names) - - def codegen(context, builder, signature, args): - in_tup = args[0] - data_arrs = [builder.extract_value(in_tup, i) for i in range(n_cols)] - index = builder.extract_value(in_tup, n_cols) - column_strs = [numba.cpython.unicode.make_string_from_constant( - context, builder, string_type, c) for c in column_names] - # create dataframe struct and store values - dataframe = cgutils.create_struct_proxy( - signature.return_type)(context, builder) - - data_list_type = [types.List(typ) for typ in types_order] - - data_lists = [] - for typ_id, typ in enumerate(types_order): - data_list_typ = context.build_list(builder, data_list_type[typ_id], - [data_arrs[data_id] for data_id in data_typs_map[typ][1]]) - data_lists.append(data_list_typ) - - data_tup = context.make_tuple( - builder, types.Tuple(data_list_type), data_lists) - - col_list_type = types.List(string_type) - column_list = context.build_list(builder, col_list_type, column_strs) - - dataframe.data = data_tup - dataframe.index = index - dataframe.columns = column_list - dataframe.parent = context.get_constant_null(types.pyobject) - - # increase refcount of stored values - if context.enable_nrt: - context.nrt.incref(builder, index_typ, index) - for var, typ in zip(data_arrs, data_typs): - context.nrt.incref(builder, typ, var) - for var in column_strs: - context.nrt.incref(builder, string_type, var) - - return dataframe._getvalue() - - ret_typ = DataFrameType(data_typs, index_typ, column_names, column_loc=column_loc) - sig = signature(ret_typ, types.Tuple(args)) - return sig, codegen - - # TODO: alias analysis # this function should be used for getting df._data for alias analysis to work # no_cpython_wrapper since Array(DatetimeDate) cannot be boxed diff --git a/sdc/hiframes/pd_series_ext.py b/sdc/hiframes/pd_series_ext.py index 245426643..d48aaf0f1 100644 --- a/sdc/hiframes/pd_series_ext.py +++ b/sdc/hiframes/pd_series_ext.py @@ -138,7 +138,7 @@ def pd_series_overload(data=None, index=None, dtype=None, name=None, copy=False, def hpat_pandas_series_ctor_impl(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False): fix_data = sdc.hiframes.api.fix_df_array(data) - fix_index = sdc.hiframes.api.fix_df_index(index, fix_data) + fix_index = sdc.hiframes.api.fix_df_index(index) return sdc.hiframes.api.init_series(fix_data, fix_index, name) return hpat_pandas_series_ctor_impl diff --git a/sdc/rewrites/dataframe_constructor.py b/sdc/rewrites/dataframe_constructor.py index c9538759e..645b86a27 100644 --- a/sdc/rewrites/dataframe_constructor.py +++ b/sdc/rewrites/dataframe_constructor.py @@ -24,13 +24,18 @@ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** - +import numba +from numba.core import cgutils, types from numba.core.rewrites import (register_rewrite, Rewrite) from numba.core.ir_utils import (guard, find_callname) from numba.core.ir import (Expr) from numba.extending import overload +from numba.core.extending import intrinsic +from numba.core.typing import signature from pandas import DataFrame +from sys import modules +from textwrap import dedent from sdc.rewrites.ir_utils import (find_operations, is_dict, get_tuple_items, get_dict_items, remove_unused_recursively, @@ -38,9 +43,11 @@ declare_constant, import_function, make_call, insert_before) -from sdc.hiframes.pd_dataframe_ext import (init_dataframe, DataFrameType) - +from sdc.hiframes import pd_dataframe_ext as pd_dataframe_ext_module +from sdc.hiframes.pd_dataframe_type import DataFrameType +from sdc.hiframes.pd_dataframe_ext import get_structure_maps, ColumnLoc from sdc.hiframes.api import fix_df_array, fix_df_index +from sdc.str_ext import string_type @register_rewrite('before-inference') @@ -54,6 +61,7 @@ class RewriteDataFrame(Rewrite): _df_arg_list = ('data', 'index', 'columns', 'dtype', 'copy') def __init__(self, pipeline): + self._pipeline = pipeline super().__init__(pipeline) self._reset() @@ -79,18 +87,43 @@ def match(self, func_ir, block, typemap, calltypes): return len(self._calls_to_rewrite) > 0 def apply(self): - init_df_stmt = import_function(init_dataframe, self._block, self._func_ir) - for stmt in self._calls_to_rewrite: args = get_call_parameters(call=stmt.value, arg_names=self._df_arg_list) - old_data = args['data'] - args['data'], args['columns'] = self._extract_dict_args(args, self._func_ir) + args_len = len(args['data']) + func_name = f'init_dataframe_{args_len}' + + injected_module = modules[pd_dataframe_ext_module.__name__] + init_df = getattr(injected_module, func_name, None) + if init_df is None: + init_df_text = gen_init_dataframe_text(func_name, args_len) + init_df = gen_init_dataframe_func( + func_name, + init_df_text, + { + 'numba': numba, + 'cgutils': cgutils, + 'signature': signature, + 'types': types, + 'get_structure_maps': get_structure_maps, + 'intrinsic': intrinsic, + 'DataFrameType': DataFrameType, + 'ColumnLoc': ColumnLoc, + 'string_type': string_type, + 'intrinsic': intrinsic + }) + + setattr(pd_dataframe_ext_module, func_name, init_df) + init_df.__module__ = pd_dataframe_ext_module.__name__ + init_df._defn.__module__ = pd_dataframe_ext_module.__name__ + + init_df_stmt = import_function(init_df, self._block, self._func_ir) self._replace_call(stmt, init_df_stmt.target, args, self._block, self._func_ir) remove_unused_recursively(old_data, self._block, self._func_ir) + self._pipeline.typingctx.refresh() return self._block @@ -136,8 +169,7 @@ def _replace_call(stmt, new_call, args, block, func_ir): none_stmt = declare_constant(None, block, func_ir, stmt.loc) index_args = none_stmt.target - index_and_data_args = [index_args] + data_args - index_args = RewriteDataFrame._replace_index_with_arrays(index_and_data_args, stmt, block, func_ir) + index_args = RewriteDataFrame._replace_index_with_arrays([index_args], stmt, block, func_ir) all_args = data_args + index_args + columns_args call = Expr.call(new_call, all_args, {}, func.loc) @@ -168,6 +200,80 @@ def _replace_index_with_arrays(args, stmt, block, func_ir): return new_args +def gen_init_dataframe_text(func_name, n_cols): + args_col_data = ['c' + str(i) for i in range(n_cols)] + args_col_names = ['n' + str(i) for i in range(n_cols)] + params = ', '.join(args_col_data + ['index'] + args_col_names) + suffix = ('' if n_cols == 0 else ', ') + + func_text = dedent(f''' + @intrinsic + def {func_name}(typingctx, {params}): + """Create a DataFrame with provided data, index and columns values. + Used as a single constructor for DataFrame and assigning its data, so that + optimization passes can look for init_dataframe() to see if underlying + data has changed, and get the array variables from init_dataframe() args if + not changed. + """ + + n_cols = {n_cols} + data_typs = ({', '.join(args_col_data) + suffix}) + index_typ = index + column_names = tuple(a.literal_value for a in ({', '.join(args_col_names) + suffix})) + column_loc, data_typs_map, types_order = get_structure_maps(data_typs, column_names) + + def codegen(context, builder, signature, args): + {params}, = args + data_arrs = [{', '.join(args_col_data) + suffix}] + column_strs = [numba.cpython.unicode.make_string_from_constant( + context, builder, string_type, c) for c in column_names] + # create dataframe struct and store values + dataframe = cgutils.create_struct_proxy( + signature.return_type)(context, builder) + + data_list_type = [types.List(typ) for typ in types_order] + + data_lists = [] + for typ_id, typ in enumerate(types_order): + data_list_typ = context.build_list(builder, data_list_type[typ_id], + [data_arrs[data_id] for data_id in data_typs_map[typ][1]]) + data_lists.append(data_list_typ) + + data_tup = context.make_tuple( + builder, types.Tuple(data_list_type), data_lists) + + col_list_type = types.List(string_type) + column_list = context.build_list(builder, col_list_type, column_strs) + + dataframe.data = data_tup + dataframe.index = index + dataframe.columns = column_list + dataframe.parent = context.get_constant_null(types.pyobject) + + # increase refcount of stored values + if context.enable_nrt: + context.nrt.incref(builder, index_typ, index) + for var, typ in zip(data_arrs, data_typs): + context.nrt.incref(builder, typ, var) + for var in column_strs: + context.nrt.incref(builder, string_type, var) + + return dataframe._getvalue() + + ret_typ = DataFrameType(data_typs, index_typ, column_names, column_loc=column_loc) + sig = signature(ret_typ, {params}) + return sig, codegen + ''') + + return func_text + + +def gen_init_dataframe_func(func_name, func_text, global_vars): + + loc_vars = {} + exec(func_text, global_vars, loc_vars) + return loc_vars[func_name] + @overload(DataFrame) def pd_dataframe_overload(data, index=None, columns=None, dtype=None, copy=False): """ diff --git a/sdc/tests/test_dataframe.py b/sdc/tests/test_dataframe.py index 9102c916f..8f97de4fb 100644 --- a/sdc/tests/test_dataframe.py +++ b/sdc/tests/test_dataframe.py @@ -98,6 +98,15 @@ def test_impl(n): n = 11 self.assertEqual(hpat_func(n), test_impl(n)) + def test_create4(self): + """ Verifies empty DF can be created """ + def test_impl(): + df = pd.DataFrame({}) + return len(df) + hpat_func = self.jit(test_impl) + + self.assertEqual(hpat_func(), test_impl()) + def test_create_str(self): def test_impl(): df = pd.DataFrame({'A': ['a', 'b', 'c']}) From 1155b3e51f4aa6dd12726b3edb0caea9e0aa24f2 Mon Sep 17 00:00:00 2001 From: "Kozlov, Alexey" Date: Wed, 11 Nov 2020 17:50:42 +0300 Subject: [PATCH 2/4] Manually inline fix_df_array/index calls into ctor --- sdc/hiframes/api.py | 2 +- sdc/rewrites/dataframe_constructor.py | 77 ++++++++++++++------------- sdc/tests/test_dataframe.py | 1 - 3 files changed, 42 insertions(+), 38 deletions(-) diff --git a/sdc/hiframes/api.py b/sdc/hiframes/api.py index 08d3fc64d..77436f49b 100644 --- a/sdc/hiframes/api.py +++ b/sdc/hiframes/api.py @@ -167,7 +167,7 @@ def fix_df_array_list_str_impl(column): # pragma: no cover return lambda column: column -def fix_df_index(index, *columns): +def fix_df_index(index): return index diff --git a/sdc/rewrites/dataframe_constructor.py b/sdc/rewrites/dataframe_constructor.py index 645b86a27..17c016078 100644 --- a/sdc/rewrites/dataframe_constructor.py +++ b/sdc/rewrites/dataframe_constructor.py @@ -44,8 +44,8 @@ import_function, make_call, insert_before) from sdc.hiframes import pd_dataframe_ext as pd_dataframe_ext_module -from sdc.hiframes.pd_dataframe_type import DataFrameType -from sdc.hiframes.pd_dataframe_ext import get_structure_maps, ColumnLoc +from sdc.hiframes.pd_dataframe_type import DataFrameType, ColumnLoc +from sdc.hiframes.pd_dataframe_ext import get_structure_maps from sdc.hiframes.api import fix_df_array, fix_df_index from sdc.str_ext import string_type @@ -112,7 +112,9 @@ def apply(self): 'DataFrameType': DataFrameType, 'ColumnLoc': ColumnLoc, 'string_type': string_type, - 'intrinsic': intrinsic + 'intrinsic': intrinsic, + 'fix_df_array': fix_df_array, + 'fix_df_index': fix_df_index }) setattr(pd_dataframe_ext_module, func_name, init_df) @@ -163,42 +165,17 @@ def _replace_call(stmt, new_call, args, block, func_ir): columns_args = args['columns'] index_args = args.get('index') - data_args = RewriteDataFrame._replace_data_with_arrays(data_args, stmt, block, func_ir) - if index_args is None: # index arg was omitted none_stmt = declare_constant(None, block, func_ir, stmt.loc) index_args = none_stmt.target - index_args = RewriteDataFrame._replace_index_with_arrays([index_args], stmt, block, func_ir) + index_args = [index_args] all_args = data_args + index_args + columns_args call = Expr.call(new_call, all_args, {}, func.loc) stmt.value = call - @staticmethod - def _replace_data_with_arrays(args, stmt, block, func_ir): - new_args = [] - - for var in args: - call_stmt = make_call(fix_df_array, [var], {}, block, func_ir, var.loc) - insert_before(block, call_stmt, stmt) - new_args.append(call_stmt.target) - - return new_args - - @staticmethod - def _replace_index_with_arrays(args, stmt, block, func_ir): - new_args = [] - - call_stmt = make_call(fix_df_index, args, {}, block, func_ir, args[0].loc) - insert_before(block, call_stmt, stmt) - new_args.append(call_stmt.target) - - return new_args - - return new_args - def gen_init_dataframe_text(func_name, n_cols): args_col_data = ['c' + str(i) for i in range(n_cols)] @@ -206,7 +183,8 @@ def gen_init_dataframe_text(func_name, n_cols): params = ', '.join(args_col_data + ['index'] + args_col_names) suffix = ('' if n_cols == 0 else ', ') - func_text = dedent(f''' + func_text = dedent( + f''' @intrinsic def {func_name}(typingctx, {params}): """Create a DataFrame with provided data, index and columns values. @@ -217,31 +195,57 @@ def {func_name}(typingctx, {params}): """ n_cols = {n_cols} - data_typs = ({', '.join(args_col_data) + suffix}) - index_typ = index + + input_data_typs = ({', '.join(args_col_data) + suffix}) + fnty = typingctx.resolve_value_type(fix_df_array) + fixed_col_sigs = [] + for i in range({n_cols}): + to_sig = fnty.get_call_type(typingctx, (input_data_typs[i],), {{}}) + fixed_col_sigs.append(to_sig) + data_typs = tuple(fixed_col_sigs[i].return_type for i in range({n_cols})) + need_fix_cols = tuple(data_typs[i] != input_data_typs[i] for i in range({n_cols})) + + input_index_typ = index + fnty = typingctx.resolve_value_type(fix_df_index) + fixed_index_sig = fnty.get_call_type(typingctx, (input_index_typ,), {{}}) + index_typ = fixed_index_sig.return_type + need_fix_index = index_typ != input_index_typ + column_names = tuple(a.literal_value for a in ({', '.join(args_col_names) + suffix})) column_loc, data_typs_map, types_order = get_structure_maps(data_typs, column_names) + col_needs_transform = tuple(not isinstance(data_typs[i], types.Array) for i in range(len(data_typs))) - def codegen(context, builder, signature, args): + def codegen(context, builder, sig, args): {params}, = args data_arrs = [{', '.join(args_col_data) + suffix}] + data_arrs_transformed = [] + for i, arr in enumerate(data_arrs): + if need_fix_cols[i] == False: + data_arrs_transformed.append(arr) + else: + res = context.compile_internal(builder, lambda a: fix_df_array(a), fixed_col_sigs[i], [arr]) + data_arrs_transformed.append(res) + column_strs = [numba.cpython.unicode.make_string_from_constant( context, builder, string_type, c) for c in column_names] # create dataframe struct and store values dataframe = cgutils.create_struct_proxy( - signature.return_type)(context, builder) + sig.return_type)(context, builder) data_list_type = [types.List(typ) for typ in types_order] data_lists = [] for typ_id, typ in enumerate(types_order): data_list_typ = context.build_list(builder, data_list_type[typ_id], - [data_arrs[data_id] for data_id in data_typs_map[typ][1]]) + [data_arrs_transformed[data_id] for data_id in data_typs_map[typ][1]]) data_lists.append(data_list_typ) data_tup = context.make_tuple( builder, types.Tuple(data_list_type), data_lists) + if need_fix_index == True: + index = context.compile_internal(builder, lambda a: fix_df_index(a), fixed_index_sig, [index]) + col_list_type = types.List(string_type) column_list = context.build_list(builder, col_list_type, column_strs) @@ -253,7 +257,7 @@ def codegen(context, builder, signature, args): # increase refcount of stored values if context.enable_nrt: context.nrt.incref(builder, index_typ, index) - for var, typ in zip(data_arrs, data_typs): + for var, typ in zip(data_arrs_transformed, data_typs): context.nrt.incref(builder, typ, var) for var in column_strs: context.nrt.incref(builder, string_type, var) @@ -274,6 +278,7 @@ def gen_init_dataframe_func(func_name, func_text, global_vars): exec(func_text, global_vars, loc_vars) return loc_vars[func_name] + @overload(DataFrame) def pd_dataframe_overload(data, index=None, columns=None, dtype=None, copy=False): """ diff --git a/sdc/tests/test_dataframe.py b/sdc/tests/test_dataframe.py index 8f97de4fb..38f9d7a76 100644 --- a/sdc/tests/test_dataframe.py +++ b/sdc/tests/test_dataframe.py @@ -251,7 +251,6 @@ def test_impl(df): hpat_func = self.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) - @skip_numba_jit def test_box1(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) From d5863f58ffec902d15ad5e110c8da944b147a200 Mon Sep 17 00:00:00 2001 From: "Kozlov, Alexey" Date: Wed, 11 Nov 2020 19:23:27 +0300 Subject: [PATCH 3/4] Fixing PEP --- sdc/rewrites/dataframe_constructor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdc/rewrites/dataframe_constructor.py b/sdc/rewrites/dataframe_constructor.py index 17c016078..daec7e1ef 100644 --- a/sdc/rewrites/dataframe_constructor.py +++ b/sdc/rewrites/dataframe_constructor.py @@ -184,7 +184,7 @@ def gen_init_dataframe_text(func_name, n_cols): suffix = ('' if n_cols == 0 else ', ') func_text = dedent( - f''' + f''' @intrinsic def {func_name}(typingctx, {params}): """Create a DataFrame with provided data, index and columns values. @@ -236,8 +236,8 @@ def codegen(context, builder, sig, args): data_lists = [] for typ_id, typ in enumerate(types_order): - data_list_typ = context.build_list(builder, data_list_type[typ_id], - [data_arrs_transformed[data_id] for data_id in data_typs_map[typ][1]]) + data_arrs_of_typ = [data_arrs_transformed[data_id] for data_id in data_typs_map[typ][1]] + data_list_typ = context.build_list(builder, data_list_type[typ_id], data_arrs_of_typ) data_lists.append(data_list_typ) data_tup = context.make_tuple( From 473d77381c27d9572c7bd59a6857b00b977b838a Mon Sep 17 00:00:00 2001 From: "Kozlov, Alexey" Date: Fri, 13 Nov 2020 02:20:37 +0300 Subject: [PATCH 4/4] Applying remarks and minor updates to tests --- sdc/rewrites/dataframe_constructor.py | 4 ++-- sdc/tests/test_dataframe.py | 20 ++++++++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/sdc/rewrites/dataframe_constructor.py b/sdc/rewrites/dataframe_constructor.py index 595eed634..debf0b73c 100644 --- a/sdc/rewrites/dataframe_constructor.py +++ b/sdc/rewrites/dataframe_constructor.py @@ -95,8 +95,8 @@ def apply(self): args_len = len(args['data']) func_name = f'init_dataframe_{args_len}' - injected_module = modules[pd_dataframe_ext_module.__name__] - init_df = getattr(injected_module, func_name, None) + # injected_module = modules[pd_dataframe_ext_module.__name__] + init_df = getattr(pd_dataframe_ext_module, func_name, None) if init_df is None: init_df_text = gen_init_dataframe_text(func_name, args_len) init_df = gen_init_dataframe_func( diff --git a/sdc/tests/test_dataframe.py b/sdc/tests/test_dataframe.py index 2cc718c4e..d2b2d3547 100644 --- a/sdc/tests/test_dataframe.py +++ b/sdc/tests/test_dataframe.py @@ -98,7 +98,7 @@ def test_impl(n): n = 11 self.assertEqual(hpat_func(n), test_impl(n)) - def test_create4(self): + def test_create_empty_df(self): """ Verifies empty DF can be created """ def test_impl(): df = pd.DataFrame({}) @@ -107,6 +107,22 @@ def test_impl(): self.assertEqual(hpat_func(), test_impl()) + def test_create_multiple_dfs(self): + """ Verifies generated dataframe ctor is added to pd_dataframe_ext module + correctly (and numba global context is refreshed), so that subsequent + compilations are not broken. """ + def test_impl(a, b, c): + df1 = pd.DataFrame({'A': a, 'B': b}) + df2 = pd.DataFrame({'C': c}) + total_cols = len(df1.columns) + len(df2.columns) + return total_cols + hpat_func = self.jit(test_impl) + + a1 = np.array([1, 2, 3, 4.0, 5]) + a2 = [7, 6, 5, 4, 3] + a3 = ['a', 'b', 'c', 'd', 'e'] + self.assertEqual(hpat_func(a1, a2, a3), test_impl(a1, a2, a3)) + def test_create_str(self): def test_impl(): df = pd.DataFrame({'A': ['a', 'b', 'c']}) @@ -168,7 +184,7 @@ def test_impl(A, B, index): result_ref = test_impl(A, B, index) pd.testing.assert_frame_equal(result, result_ref) - def test_create_empty_df(self): + def test_unbox_empty_df(self): def test_impl(df): return df sdc_func = self.jit(test_impl)