diff --git a/README.md b/README.md index e0a3ff09..8b7a3ab5 100644 --- a/README.md +++ b/README.md @@ -131,3 +131,7 @@ A: If this error occurs while generating code using an ensemble model, try to re **Q: Generation fails with `ImportError: No module named ` error while transpiling model from a serialized model object.** A: This error indicates that pickle protocol cannot deserialize model object. For unpickling serialized model objects, it is required that their classes must be defined in the top level of an importable module in the unpickling environment. So installation of package which provided model's class definition should solve the problem. + +**Q: Generated by m2cgen code provides different results for some inputs compared to original Python model from which the code were obtained.** + +A: Some models force input data to be particular type during prediction phase in their native Python libraries. Currently, m2cgen works only with ``float64`` (``double``) data type. You can try to cast your input data to another type manually and check results again. Also, some small differences can happen due to specific implementation of floating-point arithmetic in a target language. diff --git a/m2cgen/assemblers/boosting.py b/m2cgen/assemblers/boosting.py index 43893ac7..e9fd6039 100644 --- a/m2cgen/assemblers/boosting.py +++ b/m2cgen/assemblers/boosting.py @@ -151,7 +151,7 @@ def __init__(self, model): def _assemble_tree(self, tree): if "leaf" in tree: - return ast.NumVal(tree["leaf"]) + return ast.NumVal(tree["leaf"], dtype=np.float32) threshold = ast.NumVal(tree["split_condition"], dtype=np.float32) split = tree["split"] diff --git a/m2cgen/assemblers/tree.py b/m2cgen/assemblers/tree.py index f25afd1e..9a40a94d 100644 --- a/m2cgen/assemblers/tree.py +++ b/m2cgen/assemblers/tree.py @@ -1,5 +1,3 @@ -import numpy as np - from m2cgen import ast from m2cgen.assemblers import utils from m2cgen.assemblers.base import ModelAssembler @@ -49,11 +47,5 @@ def _assemble_leaf(self, node_id): def _assemble_cond(self, node_id): feature_idx = self._tree.feature[node_id] - threshold = self._tree.threshold[node_id] - - # sklearn's trees internally work with float32 numbers, so in order - # to have consistent results across all supported languages, we convert - # all thresholds into float32. - threshold_num_val = ast.NumVal(threshold, dtype=np.float32) - + threshold_num_val = ast.NumVal(self._tree.threshold[node_id]) return utils.lte(ast.FeatureRef(feature_idx), threshold_num_val) diff --git a/m2cgen/interpreters/code_generator.py b/m2cgen/interpreters/code_generator.py index e222d546..74e35ec8 100644 --- a/m2cgen/interpreters/code_generator.py +++ b/m2cgen/interpreters/code_generator.py @@ -1,6 +1,10 @@ from io import StringIO from weakref import finalize +import numpy as np + +from m2cgen.interpreters.utils import format_float + class CodeTemplate: @@ -11,12 +15,14 @@ def __str__(self): return self.str_template def __call__(self, *args, **kwargs): - # Force calling str() representation - # because without it numpy gives the same output - # for different float types + + def _is_float(value): + return isinstance(value, (float, np.floating)) + return self.str_template.format( - *[str(i) for i in args], - **{k: str(v) for k, v in kwargs.items()}) + *[format_float(i) if _is_float(i) else i for i in args], + **{k: format_float(v) if _is_float(v) else v + for k, v in kwargs.items()}) class BaseCodeGenerator: @@ -201,3 +207,54 @@ class CLikeCodeGenerator(ImperativeCodeGenerator): tpl_else_statement = CodeTemplate("}} else {{") tpl_block_termination = CodeTemplate("}}") tpl_var_assignment = CodeTemplate("{var_name} = {value};") + + +class FunctionalCodeGenerator(BaseCodeGenerator): + """ + This class provides basic functionality to generate code. It is + language-agnostic, but exposes set of attributes which subclasses should + use to define syntax specific for certain language(s). + + !!IMPORTANT!!: Code generators must know nothing about AST. + """ + + tpl_function_signature = NotImplemented + tpl_if_statement = NotImplemented + tpl_else_statement = NotImplemented + tpl_block_termination = NotImplemented + + def reset_state(self): + super().reset_state() + self._func_idx = 0 + + def get_func_name(self): + func_name = f"func{self._func_idx}" + self._func_idx += 1 + return func_name + + # Following statements compute expressions using templates AND add + # it to the result. + + def add_function(self, function_name, function_body): + self.add_code_line(self.tpl_function_signature( + function_name=function_name)) + self.increase_indent() + self.add_code_lines(function_body) + self.decrease_indent() + + def function_invocation(self, function_name, *args): + function_args = " ".join(map(lambda x: f"({x})", args)) + return f"{function_name} {function_args}" + + def add_if_statement(self, if_def): + self.add_code_line(self.tpl_if_statement(if_def=if_def)) + self.increase_indent() + + def add_else_statement(self): + self.decrease_indent() + self.add_code_line(self.tpl_else_statement()) + self.increase_indent() + + def add_block_termination(self): + self.decrease_indent() + self.add_code_line(self.tpl_block_termination()) diff --git a/m2cgen/interpreters/f_sharp/code_generator.py b/m2cgen/interpreters/f_sharp/code_generator.py index f179313c..4cedc987 100644 --- a/m2cgen/interpreters/f_sharp/code_generator.py +++ b/m2cgen/interpreters/f_sharp/code_generator.py @@ -1,45 +1,21 @@ import contextlib from m2cgen.ast import CompOpType -from m2cgen.interpreters.code_generator import BaseCodeGenerator, CodeTemplate +from m2cgen.interpreters.code_generator \ + import FunctionalCodeGenerator, CodeTemplate -class FSharpCodeGenerator(BaseCodeGenerator): +class FSharpCodeGenerator(FunctionalCodeGenerator): + tpl_function_signature = CodeTemplate("let {function_name} =") + tpl_if_statement = CodeTemplate("if ({if_def}) then") + tpl_else_statement = CodeTemplate("else") tpl_num_value = CodeTemplate("{value}") tpl_infix_expression = CodeTemplate("({left}) {op} ({right})") tpl_array_index_access = CodeTemplate("{array_name}.[{index}]") - def reset_state(self): - super().reset_state() - self._func_idx = 0 - - def add_if_statement(self, if_def): - self.add_code_line(f"if ({if_def}) then") - self.increase_indent() - - def add_else_statement(self): - self.decrease_indent() - self.add_code_line("else") - self.increase_indent() - def add_if_termination(self): self.decrease_indent() - def get_func_name(self): - func_name = f"func{self._func_idx}" - self._func_idx += 1 - return func_name - - def add_function(self, function_name, function_body): - self.add_code_line(f"let {function_name} =") - self.increase_indent() - self.add_code_lines(function_body) - self.decrease_indent() - - def function_invocation(self, function_name, *args): - function_args = " ".join(map(lambda x: f"({x})", args)) - return f"{function_name} {function_args}" - def add_function_def(self, name, args): func_args = " ".join( [f"({n} : double{' list' if is_vector else ''})" diff --git a/m2cgen/interpreters/f_sharp/interpreter.py b/m2cgen/interpreters/f_sharp/interpreter.py index 5ed17422..e35ae1b5 100644 --- a/m2cgen/interpreters/f_sharp/interpreter.py +++ b/m2cgen/interpreters/f_sharp/interpreter.py @@ -3,10 +3,10 @@ from m2cgen import ast from m2cgen.interpreters import mixins, utils from m2cgen.interpreters.f_sharp.code_generator import FSharpCodeGenerator -from m2cgen.interpreters.interpreter import ToCodeInterpreter +from m2cgen.interpreters.interpreter import FunctionalToCodeInterpreter -class FSharpInterpreter(ToCodeInterpreter, +class FSharpInterpreter(FunctionalToCodeInterpreter, mixins.LinearAlgebraMixin, mixins.BinExpressionDepthTrackingMixin): @@ -38,8 +38,7 @@ def __init__(self, indent=4, function_name="score", *args, **kwargs): self.indent = indent self.function_name = function_name - cg = FSharpCodeGenerator(indent=indent) - super().__init__(cg, *args, **kwargs) + super().__init__(self.create_code_generator(), *args, **kwargs) def interpret(self, expr): self._cg.reset_state() @@ -65,26 +64,8 @@ def interpret(self, expr): return self._cg.finalize_and_get_generated_code() - def interpret_if_expr(self, expr, if_code_gen=None, **kwargs): - if if_code_gen is None: - code_gen = FSharpCodeGenerator(indent=self.indent) - nested = False - else: - code_gen = if_code_gen - nested = True - - code_gen.add_if_statement(self._do_interpret( - expr.test, **kwargs)) - code_gen.add_code_line(self._do_interpret( - expr.body, if_code_gen=code_gen, **kwargs)) - code_gen.add_else_statement() - code_gen.add_code_line(self._do_interpret( - expr.orelse, if_code_gen=code_gen, **kwargs)) - code_gen.add_if_termination() - - if not nested: - return self._cache_reused_expr( - expr, code_gen.finalize_and_get_generated_code()) + def create_code_generator(self): + return FSharpCodeGenerator(indent=self.indent) def interpret_pow_expr(self, expr, **kwargs): base_result = self._do_interpret(expr.base_expr, **kwargs) @@ -96,17 +77,6 @@ def interpret_log1p_expr(self, expr, **kwargs): self.with_log1p_expr = True return super().interpret_log1p_expr(expr, **kwargs) - # Cached expressions become functions with no arguments, i.e. values - # which are CAFs. Therefore, they are computed only once. - def _cache_reused_expr(self, expr, expr_result): - if expr in self._cached_expr_results: - return self._cached_expr_results[expr].var_name - else: - func_name = self._cg.get_func_name() - self._cached_expr_results[expr] = utils.CachedResult( - var_name=func_name, expr_result=expr_result) - return func_name - def _dump_cache(self): if self._cached_expr_results: for func_name, expr_result in self._cached_expr_results.values(): diff --git a/m2cgen/interpreters/haskell/code_generator.py b/m2cgen/interpreters/haskell/code_generator.py index 4457c1a4..e4ba992c 100644 --- a/m2cgen/interpreters/haskell/code_generator.py +++ b/m2cgen/interpreters/haskell/code_generator.py @@ -1,51 +1,24 @@ import contextlib from m2cgen.ast import CompOpType -from m2cgen.interpreters.code_generator import BaseCodeGenerator, CodeTemplate +from m2cgen.interpreters.code_generator \ + import FunctionalCodeGenerator, CodeTemplate -class HaskellCodeGenerator(BaseCodeGenerator): +class HaskellCodeGenerator(FunctionalCodeGenerator): + tpl_function_signature = CodeTemplate("{function_name} =") + tpl_if_statement = CodeTemplate("if ({if_def}) then") + tpl_else_statement = CodeTemplate("else") tpl_num_value = CodeTemplate("{value}") tpl_infix_expression = CodeTemplate("({left}) {op} ({right})") tpl_module_definition = CodeTemplate("module {module_name} where") - def reset_state(self): - super().reset_state() - self._func_idx = 0 - def array_index_access(self, array_name, index): return self.tpl_infix_expression( left=array_name, op="!!", right=index) - def add_if_statement(self, if_def): - self.add_code_line(f"if ({if_def})") - self.increase_indent() - self.add_code_line("then") - self.increase_indent() - - def add_else_statement(self): - self.decrease_indent() - self.add_code_line("else") - self.increase_indent() - def add_if_termination(self): self.decrease_indent() - self.decrease_indent() - - def get_func_name(self): - func_name = f"func{self._func_idx}" - self._func_idx += 1 - return func_name - - def add_function(self, function_name, function_body): - self.add_code_line(f"{function_name} =") - self.increase_indent() - self.add_code_lines(function_body) - self.decrease_indent() - - def function_invocation(self, function_name, *args): - function_args = " ".join(map(lambda x: f"({x})", args)) - return f"{function_name} {function_args}" def add_function_def(self, name, args, is_scalar_output): types = " -> ".join( diff --git a/m2cgen/interpreters/haskell/interpreter.py b/m2cgen/interpreters/haskell/interpreter.py index 69d55ae2..1df162bf 100644 --- a/m2cgen/interpreters/haskell/interpreter.py +++ b/m2cgen/interpreters/haskell/interpreter.py @@ -3,10 +3,10 @@ from m2cgen import ast from m2cgen.interpreters import mixins, utils from m2cgen.interpreters.haskell.code_generator import HaskellCodeGenerator -from m2cgen.interpreters.interpreter import ToCodeInterpreter +from m2cgen.interpreters.interpreter import FunctionalToCodeInterpreter -class HaskellInterpreter(ToCodeInterpreter, +class HaskellInterpreter(FunctionalToCodeInterpreter, mixins.LinearAlgebraMixin): supported_bin_vector_ops = { ast.BinNumOpType.ADD: "addVectors", @@ -31,8 +31,7 @@ def __init__(self, module_name="Model", indent=4, function_name="score", self.indent = indent self.function_name = function_name - cg = HaskellCodeGenerator(indent=indent) - super().__init__(cg, *args, **kwargs) + super().__init__(self.create_code_generator(), *args, **kwargs) def interpret(self, expr): self._cg.reset_state() @@ -64,26 +63,8 @@ def interpret(self, expr): return self._cg.finalize_and_get_generated_code() - def interpret_if_expr(self, expr, if_code_gen=None, **kwargs): - if if_code_gen is None: - code_gen = HaskellCodeGenerator(indent=self.indent) - nested = False - else: - code_gen = if_code_gen - nested = True - - code_gen.add_if_statement(self._do_interpret( - expr.test, **kwargs)) - code_gen.add_code_line(self._do_interpret( - expr.body, if_code_gen=code_gen, **kwargs)) - code_gen.add_else_statement() - code_gen.add_code_line(self._do_interpret( - expr.orelse, if_code_gen=code_gen, **kwargs)) - code_gen.add_if_termination() - - if not nested: - return self._cache_reused_expr( - expr, code_gen.finalize_and_get_generated_code()) + def create_code_generator(self): + return HaskellCodeGenerator(indent=self.indent) def interpret_pow_expr(self, expr, **kwargs): base_result = self._do_interpret(expr.base_expr, **kwargs) @@ -95,17 +76,6 @@ def interpret_log1p_expr(self, expr, **kwargs): self.with_log1p_expr = True return super().interpret_log1p_expr(expr, **kwargs) - # Cached expressions become functions with no arguments, i.e. values - # which are CAFs. Therefore, they are computed only once. - def _cache_reused_expr(self, expr, expr_result): - if expr in self._cached_expr_results: - return self._cached_expr_results[expr].var_name - else: - func_name = self._cg.get_func_name() - self._cached_expr_results[expr] = utils.CachedResult( - var_name=func_name, expr_result=expr_result) - return func_name - def _dump_cache(self): if self._cached_expr_results: self._cg.add_code_line("where") diff --git a/m2cgen/interpreters/interpreter.py b/m2cgen/interpreters/interpreter.py index 666fb99a..51c5bc65 100644 --- a/m2cgen/interpreters/interpreter.py +++ b/m2cgen/interpreters/interpreter.py @@ -228,3 +228,51 @@ def _cache_reused_expr(self, expr, expr_result): self._cached_expr_results[expr] = CachedResult( var_name=var_name, expr_result=None) return var_name + + +class FunctionalToCodeInterpreter(ToCodeInterpreter): + """ + This interpreter provides default implementation for the methods + interpreting AST expression into code. + + It can be used for the most functional programming languages and requires + only language-specific instance of the CodeGenerator. + + !!IMPORTANT!!: Code generators used by this interpreter must know nothing + about AST. + """ + + def interpret_if_expr(self, expr, if_code_gen=None, **kwargs): + if if_code_gen is None: + code_gen = self.create_code_generator() + nested = False + else: + code_gen = if_code_gen + nested = True + + code_gen.add_if_statement(self._do_interpret( + expr.test, **kwargs)) + code_gen.add_code_line(self._do_interpret( + expr.body, if_code_gen=code_gen, **kwargs)) + code_gen.add_else_statement() + code_gen.add_code_line(self._do_interpret( + expr.orelse, if_code_gen=code_gen, **kwargs)) + code_gen.add_if_termination() + + if not nested: + return self._cache_reused_expr( + expr, code_gen.finalize_and_get_generated_code()) + + # Cached expressions become functions with no arguments, i.e. values + # which are CAFs. Therefore, they are computed only once. + def _cache_reused_expr(self, expr, expr_result): + if expr in self._cached_expr_results: + return self._cached_expr_results[expr].var_name + else: + func_name = self._cg.get_func_name() + self._cached_expr_results[expr] = CachedResult( + var_name=func_name, expr_result=expr_result) + return func_name + + def create_code_generator(self): + raise NotImplementedError diff --git a/m2cgen/interpreters/java/linear_algebra.java b/m2cgen/interpreters/java/linear_algebra.java index bd455e4a..2ff1eb15 100644 --- a/m2cgen/interpreters/java/linear_algebra.java +++ b/m2cgen/interpreters/java/linear_algebra.java @@ -1,11 +1,11 @@ -public static double[] addVectors(double[] v1, double[] v2) { +private static double[] addVectors(double[] v1, double[] v2) { double[] result = new double[v1.length]; for (int i = 0; i < v1.length; i++) { result[i] = v1[i] + v2[i]; } return result; } -public static double[] mulVectorNumber(double[] v1, double num) { +private static double[] mulVectorNumber(double[] v1, double num) { double[] result = new double[v1.length]; for (int i = 0; i < v1.length; i++) { result[i] = v1[i] * num; diff --git a/m2cgen/interpreters/utils.py b/m2cgen/interpreters/utils.py index f32cd68c..8fbb6adb 100644 --- a/m2cgen/interpreters/utils.py +++ b/m2cgen/interpreters/utils.py @@ -1,5 +1,7 @@ import re +import numpy as np + from collections import namedtuple from functools import lru_cache from math import ceil, log @@ -22,3 +24,7 @@ def _get_handler_name(expr_tpe): def _normalize_expr_name(name): return re.sub("(?!^)([A-Z]+)", r"_\1", name).lower() + + +def format_float(value): + return np.format_float_positional(value, unique=True, trim="0") diff --git a/requirements-test.txt b/requirements-test.txt index 0c017fec..4a482cdf 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -7,7 +7,7 @@ git+git://github.com/scikit-learn-contrib/lightning.git@782c18c12961e509099ae84c # Testing tools flake8==3.8.3 -pytest==5.4.3 +pytest==6.0.1 pytest-mock==3.2.0 coveralls==2.1.1 pytest-cov==2.10.0 diff --git a/tests/e2e/executors/c.py b/tests/e2e/executors/c.py index 0bc656fd..de4611c0 100644 --- a/tests/e2e/executors/c.py +++ b/tests/e2e/executors/c.py @@ -54,7 +54,7 @@ def __init__(self, model): def predict(self, X): exec_args = [os.path.join(self._resource_tmp_dir, self.model_name)] - exec_args.extend(map(str, X)) + exec_args.extend(map(interpreters.utils.format_float, X)) return utils.predict_from_commandline(exec_args) def prepare(self): diff --git a/tests/e2e/executors/c_sharp.py b/tests/e2e/executors/c_sharp.py index 0b5db8cd..80e8915f 100644 --- a/tests/e2e/executors/c_sharp.py +++ b/tests/e2e/executors/c_sharp.py @@ -49,7 +49,7 @@ def __init__(self, model): def predict(self, X): exec_args = [os.path.join(self.target_exec_dir, self.project_name)] - exec_args.extend(map(str, X)) + exec_args.extend(map(interpreters.utils.format_float, X)) return utils.predict_from_commandline(exec_args) @classmethod diff --git a/tests/e2e/executors/dart.py b/tests/e2e/executors/dart.py index 64c489da..d7baf9ba 100644 --- a/tests/e2e/executors/dart.py +++ b/tests/e2e/executors/dart.py @@ -42,7 +42,7 @@ def predict(self, X): f"{self.executor_name}.dart") exec_args = [self._dart, file_name, - *map(str, X)] + *map(interpreters.utils.format_float, X)] return utils.predict_from_commandline(exec_args) def prepare(self): diff --git a/tests/e2e/executors/f_sharp.py b/tests/e2e/executors/f_sharp.py index 1089b33b..96f76c3a 100644 --- a/tests/e2e/executors/f_sharp.py +++ b/tests/e2e/executors/f_sharp.py @@ -35,7 +35,7 @@ def __init__(self, model): def predict(self, X): exec_args = [os.path.join(self.target_exec_dir, self.project_name)] - exec_args.extend(map(str, X)) + exec_args.extend(map(interpreters.utils.format_float, X)) return utils.predict_from_commandline(exec_args) @classmethod diff --git a/tests/e2e/executors/go.py b/tests/e2e/executors/go.py index 1d5b99c4..1eca5d1e 100644 --- a/tests/e2e/executors/go.py +++ b/tests/e2e/executors/go.py @@ -55,7 +55,7 @@ def __init__(self, model): def predict(self, X): exec_args = [os.path.join(self._resource_tmp_dir, self.model_name)] - exec_args.extend(map(str, X)) + exec_args.extend(map(interpreters.utils.format_float, X)) return utils.predict_from_commandline(exec_args) def prepare(self): diff --git a/tests/e2e/executors/haskell.py b/tests/e2e/executors/haskell.py index 022901cf..ba28de0d 100644 --- a/tests/e2e/executors/haskell.py +++ b/tests/e2e/executors/haskell.py @@ -39,7 +39,8 @@ def __init__(self, model): def predict(self, X): app_name = os.path.join(self._resource_tmp_dir, self.executor_name) - exec_args = [app_name, *map(str, X)] + exec_args = [app_name, + *map(interpreters.utils.format_float, X)] return utils.predict_from_commandline(exec_args) def prepare(self): diff --git a/tests/e2e/executors/java.py b/tests/e2e/executors/java.py index e1222599..2b735793 100644 --- a/tests/e2e/executors/java.py +++ b/tests/e2e/executors/java.py @@ -24,7 +24,7 @@ def predict(self, X): self._java_bin, "-cp", self._resource_tmp_dir, "Executor", "Model", "score" ] - exec_args.extend(map(str, X)) + exec_args.extend(map(m2c.interpreters.utils.format_float, X)) return utils.predict_from_commandline(exec_args) def prepare(self): diff --git a/tests/e2e/executors/javascript.py b/tests/e2e/executors/javascript.py index 5d387fc0..78b8b7c5 100644 --- a/tests/e2e/executors/javascript.py +++ b/tests/e2e/executors/javascript.py @@ -1,4 +1,5 @@ import os + from py_mini_racer import py_mini_racer import m2cgen as m2c @@ -16,10 +17,11 @@ def predict(self, X): with open(file_name, 'r') as myfile: code = myfile.read() - caller = f"score([{','.join(map(str, X))}]);\n" + args = ",".join(map(m2c.interpreters.utils.format_float, X)) + caller = f"score([{args}]);\n" ctx = py_mini_racer.MiniRacer() - result = ctx.eval(caller + code) + result = ctx.eval(f"{caller}{code}") return result diff --git a/tests/e2e/executors/php.py b/tests/e2e/executors/php.py index 9b56026e..655793f5 100644 --- a/tests/e2e/executors/php.py +++ b/tests/e2e/executors/php.py @@ -47,7 +47,7 @@ def predict(self, X): exec_args = [self._php, "-f", file_name, - *map(str, X)] + *map(interpreters.utils.format_float, X)] return utils.predict_from_commandline(exec_args) def prepare(self): diff --git a/tests/e2e/executors/powershell.py b/tests/e2e/executors/powershell.py index a3eafca7..b80f59ac 100644 --- a/tests/e2e/executors/powershell.py +++ b/tests/e2e/executors/powershell.py @@ -40,7 +40,7 @@ def predict(self, X): "-File", file_name, "-InputArray", - ",".join(map(str, X))] + ",".join(map(interpreters.utils.format_float, X))] return utils.predict_from_commandline(exec_args) def prepare(self): diff --git a/tests/e2e/executors/r.py b/tests/e2e/executors/r.py index f1494209..51fa8dd9 100644 --- a/tests/e2e/executors/r.py +++ b/tests/e2e/executors/r.py @@ -34,7 +34,7 @@ def predict(self, X): exec_args = [self._r, "--vanilla", file_name, - *map(str, X)] + *map(interpreters.utils.format_float, X)] return utils.predict_from_commandline(exec_args) def prepare(self): diff --git a/tests/e2e/executors/ruby.py b/tests/e2e/executors/ruby.py index 3615e2af..71b8329b 100644 --- a/tests/e2e/executors/ruby.py +++ b/tests/e2e/executors/ruby.py @@ -38,7 +38,9 @@ def __init__(self, model): def predict(self, X): file_name = os.path.join(self._resource_tmp_dir, f"{self.model_name}.rb") - exec_args = [self._ruby, file_name, *map(str, X)] + exec_args = [self._ruby, + file_name, + *map(interpreters.utils.format_float, X)] return utils.predict_from_commandline(exec_args) def prepare(self): diff --git a/tests/e2e/executors/visual_basic.py b/tests/e2e/executors/visual_basic.py index 3209a9a4..b4189d02 100644 --- a/tests/e2e/executors/visual_basic.py +++ b/tests/e2e/executors/visual_basic.py @@ -51,7 +51,7 @@ def __init__(self, model): def predict(self, X): exec_args = [os.path.join(self.target_exec_dir, self.project_name)] - exec_args.extend(map(str, X)) + exec_args.extend(map(interpreters.utils.format_float, X)) return utils.predict_from_commandline(exec_args) @classmethod diff --git a/tests/e2e/test_e2e.py b/tests/e2e/test_e2e.py index b2aacb5c..4dc5b595 100644 --- a/tests/e2e/test_e2e.py +++ b/tests/e2e/test_e2e.py @@ -568,6 +568,8 @@ def test_e2e(estimator, executor_cls, model_trainer, with executor.prepare_then_cleanup(): for idx in idxs_to_test: y_pred_executed = executor.predict(X_test[idx]) + y_pred_executed = np.array( + y_pred_executed, dtype=y_pred_true.dtype, copy=False) print(f"expected={y_pred_true[idx]}, actual={y_pred_executed}") res = np.isclose(y_pred_true[idx], y_pred_executed, atol=ATOL) assert res if isinstance(res, bool) else res.all() diff --git a/tests/interpreters/test_haskell.py b/tests/interpreters/test_haskell.py index c7877a1f..cb71f703 100644 --- a/tests/interpreters/test_haskell.py +++ b/tests/interpreters/test_haskell.py @@ -16,11 +16,10 @@ def test_if_expr(): func0 where func0 = - if ((1.0) == ((input) !! (0))) - then - 2.0 - else - 3.0 + if ((1.0) == ((input) !! (0))) then + 2.0 + else + 3.0 """ interpreter = HaskellInterpreter() @@ -68,17 +67,15 @@ def test_dependable_condition(): func1 where func0 = - if ((1.0) == (1.0)) - then - 1.0 - else - 2.0 + if ((1.0) == (1.0)) then + 1.0 + else + 2.0 func1 = - if (((func0) + (2.0)) >= ((1.0) / (2.0))) - then - 1.0 - else - (input) !! (0) + if (((func0) + (2.0)) >= ((1.0) / (2.0))) then + 1.0 + else + (input) !! (0) """ interpreter = HaskellInterpreter() @@ -110,21 +107,18 @@ def test_nested_condition(): func1 where func0 = - if ((1.0) == (1.0)) - then - 1.0 - else - 2.0 + if ((1.0) == (1.0)) then + 1.0 + else + 2.0 func1 = - if ((1.0) == ((func0) + (2.0))) - then - if ((1.0) == ((func0) + (2.0))) - then - (input) !! (2) - else - 2.0 + if ((1.0) == ((func0) + (2.0))) then + if ((1.0) == ((func0) + (2.0))) then + (input) !! (2) else 2.0 + else + 2.0 """ interpreter = HaskellInterpreter() @@ -162,11 +156,10 @@ def test_multi_output(): func0 where func0 = - if ((1.0) == (1.0)) - then - [1.0, 2.0] - else - [3.0, 4.0] + if ((1.0) == (1.0)) then + [1.0, 2.0] + else + [3.0, 4.0] """ interpreter = HaskellInterpreter() diff --git a/tests/interpreters/test_java.py b/tests/interpreters/test_java.py index a02e47e5..56e32a41 100644 --- a/tests/interpreters/test_java.py +++ b/tests/interpreters/test_java.py @@ -223,14 +223,14 @@ def test_bin_vector_expr(): public static double[] score(double[] input) { return addVectors(new double[] {1.0, 2.0}, new double[] {3.0, 4.0}); } - public static double[] addVectors(double[] v1, double[] v2) { + private static double[] addVectors(double[] v1, double[] v2) { double[] result = new double[v1.length]; for (int i = 0; i < v1.length; i++) { result[i] = v1[i] + v2[i]; } return result; } - public static double[] mulVectorNumber(double[] v1, double num) { + private static double[] mulVectorNumber(double[] v1, double num) { double[] result = new double[v1.length]; for (int i = 0; i < v1.length; i++) { result[i] = v1[i] * num; @@ -254,14 +254,14 @@ def test_bin_vector_num_expr(): public static double[] score(double[] input) { return mulVectorNumber(new double[] {1.0, 2.0}, 1.0); } - public static double[] addVectors(double[] v1, double[] v2) { + private static double[] addVectors(double[] v1, double[] v2) { double[] result = new double[v1.length]; for (int i = 0; i < v1.length; i++) { result[i] = v1[i] + v2[i]; } return result; } - public static double[] mulVectorNumber(double[] v1, double num) { + private static double[] mulVectorNumber(double[] v1, double num) { double[] result = new double[v1.length]; for (int i = 0; i < v1.length; i++) { result[i] = v1[i] * num; diff --git a/tests/utils.py b/tests/utils.py index 46bae2d0..2dfb97df 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -13,11 +13,13 @@ from lightning.impl.base import BaseClassifier as LightBaseClassifier from sklearn import datasets from sklearn.base import BaseEstimator, RegressorMixin, clone -from sklearn.ensemble._forest import ForestClassifier +from sklearn.ensemble._forest import ForestClassifier, BaseForest from sklearn.utils import shuffle from sklearn.linear_model._base import LinearClassifierMixin from sklearn.tree import DecisionTreeClassifier +from sklearn.tree._classes import BaseDecisionTree from sklearn.svm import SVC, NuSVC +from sklearn.svm._base import BaseLibSVM from xgboost import XGBClassifier from m2cgen import ast @@ -125,15 +127,22 @@ def __call__(self, estimator): if isinstance(estimator, (LinearClassifierMixin, SVC, NuSVC, LightBaseClassifier)): y_pred = estimator.decision_function(self.X_test) - elif isinstance(estimator, DecisionTreeClassifier): - y_pred = estimator.predict_proba(self.X_test.astype(np.float32)) elif isinstance( estimator, - (ForestClassifier, XGBClassifier, LGBMClassifier)): + (ForestClassifier, DecisionTreeClassifier, + XGBClassifier, LGBMClassifier)): y_pred = estimator.predict_proba(self.X_test) else: y_pred = estimator.predict(self.X_test) + # Some models force input data to be particular type + # during prediction phase in their native Python libraries. + # For correct comparison of testing results we mimic the same behavior + if isinstance(estimator, (BaseDecisionTree, BaseForest)): + self.X_test = self.X_test.astype(np.float32, copy=False) + elif isinstance(estimator, BaseLibSVM): + self.X_test = self.X_test.astype(np.float64, copy=False) + return self.X_test, y_pred, fitted_estimator @@ -238,9 +247,9 @@ def predict_from_commandline(exec_args): items = stdout.decode("utf-8").strip().split(" ") if len(items) == 1: - return float(items[0]) + return np.float64(items[0]) else: - return [float(i) for i in items] + return [np.float64(i) for i in items] def cartesian_e2e_params(executors_with_marks, models_with_trainers_with_marks, @@ -284,4 +293,4 @@ def inner(*args, **kwarg): def _is_float(value): - return isinstance(value, (float, np.float16, np.float32, np.float64)) + return isinstance(value, (float, np.floating))