From 73fb594f44e5482751aedb8d587cc918b530e075 Mon Sep 17 00:00:00 2001 From: StrikerRUS Date: Mon, 4 May 2020 06:18:01 +0300 Subject: [PATCH 1/2] added Ruby code generator --- .travis.yml | 2 +- .travis/setup.sh | 6 + Dockerfile | 3 +- README.md | 3 +- m2cgen/__init__.py | 6 +- m2cgen/cli.py | 1 + m2cgen/exporters.py | 24 ++ m2cgen/interpreters/__init__.py | 2 + m2cgen/interpreters/ruby/__init__.py | 0 m2cgen/interpreters/ruby/code_generator.py | 37 +++ m2cgen/interpreters/ruby/interpreter.py | 61 +++++ m2cgen/interpreters/ruby/linear_algebra.rb | 6 + tests/e2e/executors/__init__.py | 2 + tests/e2e/executors/haskell.py | 9 +- tests/e2e/executors/php.py | 8 +- tests/e2e/executors/ruby.py | 57 +++++ tests/e2e/test_e2e.py | 2 + tests/interpreters/test_ruby.py | 271 +++++++++++++++++++++ 18 files changed, 486 insertions(+), 14 deletions(-) create mode 100644 m2cgen/interpreters/ruby/__init__.py create mode 100644 m2cgen/interpreters/ruby/code_generator.py create mode 100644 m2cgen/interpreters/ruby/interpreter.py create mode 100644 m2cgen/interpreters/ruby/linear_algebra.rb create mode 100644 tests/e2e/executors/ruby.py create mode 100644 tests/interpreters/test_ruby.py diff --git a/.travis.yml b/.travis.yml index 6b30c23c..d84f27e2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ python: env: - TEST=API - - TEST=E2E LANG="c_lang or python or java or go_lang or javascript or php or haskell" + - TEST=E2E LANG="c_lang or python or java or go_lang or javascript or php or haskell or ruby" - TEST=E2E LANG="c_sharp or visual_basic or powershell" - TEST=E2E LANG="r_lang or dart" diff --git a/.travis/setup.sh b/.travis/setup.sh index 41be7375..abd0ef9a 100644 --- a/.travis/setup.sh +++ b/.travis/setup.sh @@ -43,3 +43,9 @@ if [[ $LANG == *"haskell"* ]]; then sudo apt-get update sudo apt-get install --no-install-recommends -y haskell-platform fi + +# Install Ruby. +if [[ $LANG == *"ruby"* ]]; then + sudo apt-get update + sudo apt-get install --no-install-recommends -y ruby-full +fi diff --git a/Dockerfile b/Dockerfile index 185f56ae..616ae308 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,7 +27,8 @@ RUN apt-get update && \ r-base \ php \ dart \ - haskell-platform && \ + haskell-platform \ + ruby-full && \ rm -rf /var/lib/apt/lists/* WORKDIR /m2cgen diff --git a/README.md b/README.md index 1ab3b619..749a990a 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![PyPI Version](https://img.shields.io/pypi/v/m2cgen.svg?logo=pypi&logoColor=white)](https://pypi.org/project/m2cgen) [![Downloads](https://pepy.tech/badge/m2cgen)](https://pepy.tech/project/m2cgen) -**m2cgen** (Model 2 Code Generator) - is a lightweight library which provides an easy way to transpile trained statistical models into a native code (Python, C, Java, Go, JavaScript, Visual Basic, C#, PowerShell, R, PHP, Dart, Haskell). +**m2cgen** (Model 2 Code Generator) - is a lightweight library which provides an easy way to transpile trained statistical models into a native code (Python, C, Java, Go, JavaScript, Visual Basic, C#, PowerShell, R, PHP, Dart, Haskell, Ruby). * [Installation](#installation) * [Supported Languages](#supported-languages) @@ -37,6 +37,7 @@ pip install m2cgen - PowerShell - Python - R +- Ruby - Visual Basic ## Supported Models diff --git a/m2cgen/__init__.py b/m2cgen/__init__.py index d4834b39..91ba03e0 100644 --- a/m2cgen/__init__.py +++ b/m2cgen/__init__.py @@ -13,13 +13,14 @@ export_to_php, export_to_dart, export_to_haskell, + export_to_ruby, ) __all__ = [ - export_to_java, - export_to_python, export_to_c, export_to_go, + export_to_java, + export_to_python, export_to_javascript, export_to_visual_basic, export_to_c_sharp, @@ -28,6 +29,7 @@ export_to_php, export_to_dart, export_to_haskell, + export_to_ruby, ] with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), diff --git a/m2cgen/cli.py b/m2cgen/cli.py index 35a2bcae..2f5a0f63 100644 --- a/m2cgen/cli.py +++ b/m2cgen/cli.py @@ -34,6 +34,7 @@ "dart": (m2cgen.export_to_dart, ["indent", "function_name"]), "haskell": (m2cgen.export_to_haskell, ["module_name", "indent", "function_name"]), + "ruby": (m2cgen.export_to_ruby, ["indent", "function_name"]), } diff --git a/m2cgen/exporters.py b/m2cgen/exporters.py index ff145e11..7abf950d 100644 --- a/m2cgen/exporters.py +++ b/m2cgen/exporters.py @@ -354,6 +354,30 @@ def export_to_haskell(model, module_name="Model", indent=4, return _export(model, interpreter) +def export_to_ruby(model, indent=4, function_name="score"): + """ + Generates a Ruby code representation of the given model. + + Parameters + ---------- + model : object + The model object that should be transpiled into code. + indent : int, optional + The size of indents in the generated code. + function_name : string, optional + Name of the function in the generated code. + + Returns + ------- + code : string + """ + interpreter = interpreters.RubyInterpreter( + indent=indent, + function_name=function_name, + ) + return _export(model, interpreter) + + def _export(model, interpreter): assembler_cls = assemblers.get_assembler_cls(model) model_ast = assembler_cls(model).assemble() diff --git a/m2cgen/interpreters/__init__.py b/m2cgen/interpreters/__init__.py index 002cf55e..71852aee 100644 --- a/m2cgen/interpreters/__init__.py +++ b/m2cgen/interpreters/__init__.py @@ -10,6 +10,7 @@ from .php.interpreter import PhpInterpreter from .dart.interpreter import DartInterpreter from .haskell.interpreter import HaskellInterpreter +from .ruby.interpreter import RubyInterpreter __all__ = [ JavaInterpreter, @@ -24,4 +25,5 @@ PhpInterpreter, DartInterpreter, HaskellInterpreter, + RubyInterpreter, ] diff --git a/m2cgen/interpreters/ruby/__init__.py b/m2cgen/interpreters/ruby/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/m2cgen/interpreters/ruby/code_generator.py b/m2cgen/interpreters/ruby/code_generator.py new file mode 100644 index 00000000..1cc54db7 --- /dev/null +++ b/m2cgen/interpreters/ruby/code_generator.py @@ -0,0 +1,37 @@ +import contextlib + +from m2cgen.interpreters.code_generator import ImperativeCodeGenerator +from m2cgen.interpreters.code_generator import CodeTemplate as CT + + +class RubyCodeGenerator(ImperativeCodeGenerator): + + tpl_var_declaration = CT("") + tpl_num_value = CT("${value}") + tpl_infix_expression = CT("(${left}) ${op} (${right})") + tpl_return_statement = tpl_num_value + tpl_array_index_access = CT("${array_name}[${index}]") + tpl_if_statement = CT("if ${if_def}") + tpl_else_statement = CT("else") + tpl_block_termination = CT("end") + tpl_var_assignment = CT("${var_name} = ${value}") + + def add_function_def(self, name, args): + func_def = "def " + name + "(" + func_def += ", ".join(args) + func_def += ")" + self.add_code_line(func_def) + self.increase_indent() + + @contextlib.contextmanager + def function_definition(self, name, args): + self.add_function_def(name, args) + yield + self.add_block_termination() + + def method_invocation(self, method_name, obj, args): + return ("(" + str(obj) + ")." + method_name + + "(" + ", ".join(map(str, args)) + ")") + + def vector_init(self, values): + return "[" + ", ".join(values) + "]" diff --git a/m2cgen/interpreters/ruby/interpreter.py b/m2cgen/interpreters/ruby/interpreter.py new file mode 100644 index 00000000..92c402df --- /dev/null +++ b/m2cgen/interpreters/ruby/interpreter.py @@ -0,0 +1,61 @@ +import os + +from m2cgen import ast +from m2cgen.interpreters import utils, mixins +from m2cgen.interpreters.ruby.code_generator import RubyCodeGenerator +from m2cgen.interpreters.interpreter import ImperativeToCodeInterpreter + + +class RubyInterpreter(ImperativeToCodeInterpreter, + mixins.LinearAlgebraMixin): + + supported_bin_vector_ops = { + ast.BinNumOpType.ADD: "add_vectors", + } + + supported_bin_vector_num_ops = { + ast.BinNumOpType.MUL: "mul_vector_number", + } + + exponent_function_name = "Math.exp" + sqrt_function_name = "Math.sqrt" + tanh_function_name = "Math.tanh" + + def __init__(self, indent=4, function_name="score", *args, **kwargs): + self.function_name = function_name + + cg = RubyCodeGenerator(indent=indent) + super(RubyInterpreter, self).__init__(cg, *args, **kwargs) + + def interpret(self, expr): + self._cg.reset_state() + self._reset_reused_expr_cache() + + with self._cg.function_definition( + name=self.function_name, + args=[self._feature_array_name]): + last_result = self._do_interpret(expr) + self._cg.add_return_statement(last_result) + + if self.with_linear_algebra: + filename = os.path.join( + os.path.dirname(__file__), "linear_algebra.rb") + self._cg.prepend_code_lines(utils.get_file_content(filename)) + + return self._cg.code + + def interpret_bin_num_expr(self, expr, **kwargs): + if expr.op == ast.BinNumOpType.DIV: + # Always force float result + return self._cg.method_invocation( + method_name="fdiv", + obj=self._do_interpret(expr.left, **kwargs), + args=[self._do_interpret(expr.right, **kwargs)]) + else: + return super().interpret_bin_num_expr(expr, **kwargs) + + def interpret_pow_expr(self, expr, **kwargs): + base_result = self._do_interpret(expr.base_expr, **kwargs) + exp_result = self._do_interpret(expr.exp_expr, **kwargs) + return self._cg.infix_expression( + left=base_result, right=exp_result, op="**") diff --git a/m2cgen/interpreters/ruby/linear_algebra.rb b/m2cgen/interpreters/ruby/linear_algebra.rb new file mode 100644 index 00000000..b9efb038 --- /dev/null +++ b/m2cgen/interpreters/ruby/linear_algebra.rb @@ -0,0 +1,6 @@ +def add_vectors(v1, v2) + v1.zip(v2).map { |x, y| x + y } +end +def mul_vector_number(v1, num) + v1.map { |i| i * num } +end diff --git a/tests/e2e/executors/__init__.py b/tests/e2e/executors/__init__.py index 83b32b7f..e2317136 100644 --- a/tests/e2e/executors/__init__.py +++ b/tests/e2e/executors/__init__.py @@ -10,6 +10,7 @@ from tests.e2e.executors.php import PhpExecutor from tests.e2e.executors.dart import DartExecutor from tests.e2e.executors.haskell import HaskellExecutor +from tests.e2e.executors.ruby import RubyExecutor __all__ = [ JavaExecutor, @@ -24,4 +25,5 @@ PhpExecutor, DartExecutor, HaskellExecutor, + RubyExecutor, ] diff --git a/tests/e2e/executors/haskell.py b/tests/e2e/executors/haskell.py index 58e8dec4..8f465100 100644 --- a/tests/e2e/executors/haskell.py +++ b/tests/e2e/executors/haskell.py @@ -18,10 +18,9 @@ ${print_code} """ -EXECUTE_AND_PRINT_SCALAR = "print res" +PRINT_SCALAR = "print res" -EXECUTE_AND_PRINT_VECTOR = \ - r"""mapM_ (putStr . \x -> show x ++ " ") res""" +PRINT_VECTOR = r"""mapM_ (putStr . \x -> show x ++ " ") res""" class HaskellExecutor(base.BaseExecutor): @@ -46,9 +45,9 @@ def predict(self, X): def prepare(self): if self.model_ast.output_size > 1: - print_code = EXECUTE_AND_PRINT_VECTOR + print_code = PRINT_VECTOR else: - print_code = EXECUTE_AND_PRINT_SCALAR + print_code = PRINT_SCALAR executor_code = string.Template(EXECUTOR_CODE_TPL).substitute( executor_name=self.executor_name, model_name=self.model_name, diff --git a/tests/e2e/executors/php.py b/tests/e2e/executors/php.py index 5653e43c..742a430e 100644 --- a/tests/e2e/executors/php.py +++ b/tests/e2e/executors/php.py @@ -19,11 +19,11 @@ ${print_code} """ -EXECUTE_AND_PRINT_SCALAR = """ +PRINT_SCALAR = """ echo($res); """ -EXECUTE_AND_PRINT_VECTOR = """ +PRINT_VECTOR = """ echo(implode(" ", $res)); """ @@ -53,9 +53,9 @@ def predict(self, X): def prepare(self): if self.model_ast.output_size > 1: - print_code = EXECUTE_AND_PRINT_VECTOR + print_code = PRINT_VECTOR else: - print_code = EXECUTE_AND_PRINT_SCALAR + print_code = PRINT_SCALAR executor_code = string.Template(EXECUTOR_CODE_TPL).substitute( model_file=self.model_name, print_code=print_code) diff --git a/tests/e2e/executors/ruby.py b/tests/e2e/executors/ruby.py new file mode 100644 index 00000000..f0d3c571 --- /dev/null +++ b/tests/e2e/executors/ruby.py @@ -0,0 +1,57 @@ +import os +import string + +from m2cgen import assemblers, interpreters +from tests import utils +from tests.e2e.executors import base + +EXECUTOR_CODE_TPL = """ +input_array = ARGV.map(&:to_f) + +${model_code} + +res = score(input_array) + +${print_code} +""" + +PRINT_SCALAR = """ +puts res +""" + +PRINT_VECTOR = """ +puts res.join(" ") +""" + + +class RubyExecutor(base.BaseExecutor): + model_name = "score" + + def __init__(self, model): + self.model = model + self.interpreter = interpreters.RubyInterpreter() + + assembler_cls = assemblers.get_assembler_cls(model) + self.model_ast = assembler_cls(model).assemble() + + self._ruby = "ruby" + + def predict(self, X): + file_name = os.path.join(self._resource_tmp_dir, + "{}.rb".format(self.model_name)) + exec_args = [self._ruby, file_name, *map(str, X)] + return utils.predict_from_commandline(exec_args) + + def prepare(self): + if self.model_ast.output_size > 1: + print_code = PRINT_VECTOR + else: + print_code = PRINT_SCALAR + executor_code = string.Template(EXECUTOR_CODE_TPL).substitute( + model_code=self.interpreter.interpret(self.model_ast), + print_code=print_code) + + file_name = os.path.join( + self._resource_tmp_dir, "{}.rb".format(self.model_name)) + with open(file_name, "w") as f: + f.write(executor_code) diff --git a/tests/e2e/test_e2e.py b/tests/e2e/test_e2e.py index f370a01c..098c524e 100644 --- a/tests/e2e/test_e2e.py +++ b/tests/e2e/test_e2e.py @@ -31,6 +31,7 @@ PHP = pytest.mark.php DART = pytest.mark.dart HASKELL = pytest.mark.haskell +RUBY = pytest.mark.ruby REGRESSION = pytest.mark.regr CLASSIFICATION = pytest.mark.clf @@ -135,6 +136,7 @@ def classification_binary_random(model, test_fraction=0.02): (executors.PhpExecutor, PHP), (executors.DartExecutor, DART), (executors.HaskellExecutor, HASKELL), + (executors.RubyExecutor, RUBY), ], # These models will be tested against each language specified in the diff --git a/tests/interpreters/test_ruby.py b/tests/interpreters/test_ruby.py new file mode 100644 index 00000000..834c9932 --- /dev/null +++ b/tests/interpreters/test_ruby.py @@ -0,0 +1,271 @@ +from m2cgen import ast +from m2cgen.interpreters import RubyInterpreter +from tests import utils + + +def test_if_expr(): + expr = ast.IfExpr( + ast.CompExpr(ast.NumVal(1), ast.FeatureRef(0), ast.CompOpType.EQ), + ast.NumVal(2), + ast.NumVal(3)) + + expected_code = """ +def score(input) + if (1) == (input[0]) + var0 = 2 + else + var0 = 3 + end + var0 +end +""" + + interpreter = RubyInterpreter() + utils.assert_code_equal(interpreter.interpret(expr), expected_code) + + +def test_bin_num_expr(): + expr = ast.BinNumExpr( + ast.BinNumExpr( + ast.FeatureRef(0), ast.NumVal(-2), ast.BinNumOpType.DIV), + ast.NumVal(2), + ast.BinNumOpType.MUL) + + expected_code = """ +def score(input) + ((input[0]).fdiv(-2)) * (2) +end +""" + + interpreter = RubyInterpreter() + utils.assert_code_equal(interpreter.interpret(expr), expected_code) + + +def test_dependable_condition(): + left = ast.BinNumExpr( + ast.IfExpr( + ast.CompExpr(ast.NumVal(1), + ast.NumVal(1), + ast.CompOpType.EQ), + ast.NumVal(1), + ast.NumVal(2)), + ast.NumVal(2), + ast.BinNumOpType.ADD) + + right = ast.BinNumExpr(ast.NumVal(1), ast.NumVal(2), ast.BinNumOpType.DIV) + bool_test = ast.CompExpr(left, right, ast.CompOpType.GTE) + + expr = ast.IfExpr(bool_test, ast.NumVal(1), ast.FeatureRef(0)) + + expected_code = """ +def score(input) + if (1) == (1) + var1 = 1 + else + var1 = 2 + end + if ((var1) + (2)) >= ((1).fdiv(2)) + var0 = 1 + else + var0 = input[0] + end + var0 +end +""" + + interpreter = RubyInterpreter() + utils.assert_code_equal(interpreter.interpret(expr), expected_code) + + +def test_nested_condition(): + left = ast.BinNumExpr( + ast.IfExpr( + ast.CompExpr(ast.NumVal(1), + ast.NumVal(1), + ast.CompOpType.EQ), + ast.NumVal(1), + ast.NumVal(2)), + ast.NumVal(2), + ast.BinNumOpType.ADD) + + bool_test = ast.CompExpr(ast.NumVal(1), left, ast.CompOpType.EQ) + + expr_nested = ast.IfExpr(bool_test, ast.FeatureRef(2), ast.NumVal(2)) + + expr = ast.IfExpr(bool_test, expr_nested, ast.NumVal(2)) + + expected_code = """ +def score(input) + if (1) == (1) + var1 = 1 + else + var1 = 2 + end + if (1) == ((var1) + (2)) + if (1) == (1) + var2 = 1 + else + var2 = 2 + end + if (1) == ((var2) + (2)) + var0 = input[2] + else + var0 = 2 + end + else + var0 = 2 + end + var0 +end +""" + + interpreter = RubyInterpreter() + utils.assert_code_equal(interpreter.interpret(expr), expected_code) + + +def test_raw_array(): + expr = ast.VectorVal([ast.NumVal(3), ast.NumVal(4)]) + + expected_code = """ +def score(input) + [3, 4] +end +""" + + interpreter = RubyInterpreter() + utils.assert_code_equal(interpreter.interpret(expr), expected_code) + + +def test_multi_output(): + expr = ast.IfExpr( + ast.CompExpr( + ast.NumVal(1), + ast.NumVal(1), + ast.CompOpType.EQ), + ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), + ast.VectorVal([ast.NumVal(3), ast.NumVal(4)])) + + expected_code = """ +def score(input) + if (1) == (1) + var0 = [1, 2] + else + var0 = [3, 4] + end + var0 +end +""" + + interpreter = RubyInterpreter() + utils.assert_code_equal(interpreter.interpret(expr), expected_code) + + +def test_bin_vector_expr(): + expr = ast.BinVectorExpr( + ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), + ast.VectorVal([ast.NumVal(3), ast.NumVal(4)]), + ast.BinNumOpType.ADD) + + expected_code = """ +def add_vectors(v1, v2) + v1.zip(v2).map { |x, y| x + y } +end +def mul_vector_number(v1, num) + v1.map { |i| i * num } +end +def score(input) + add_vectors([1, 2], [3, 4]) +end +""" + + interpreter = RubyInterpreter() + utils.assert_code_equal(interpreter.interpret(expr), expected_code) + + +def test_bin_vector_num_expr(): + expr = ast.BinVectorNumExpr( + ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), + ast.NumVal(1), + ast.BinNumOpType.MUL) + + expected_code = """ +def add_vectors(v1, v2) + v1.zip(v2).map { |x, y| x + y } +end +def mul_vector_number(v1, num) + v1.map { |i| i * num } +end +def score(input) + mul_vector_number([1, 2], 1) +end +""" + + interpreter = RubyInterpreter() + utils.assert_code_equal(interpreter.interpret(expr), expected_code) + + +def test_exp_expr(): + expr = ast.ExpExpr(ast.NumVal(1.0)) + + expected_code = """ +def score(input) + Math.exp(1.0) +end +""" + + interpreter = RubyInterpreter() + utils.assert_code_equal(interpreter.interpret(expr), expected_code) + + +def test_pow_expr(): + expr = ast.PowExpr(ast.NumVal(2.0), ast.NumVal(3.0)) + + expected_code = """ +def score(input) + (2.0) ** (3.0) +end +""" + + interpreter = RubyInterpreter() + utils.assert_code_equal(interpreter.interpret(expr), expected_code) + + +def test_sqrt_expr(): + expr = ast.SqrtExpr(ast.NumVal(2.0)) + + expected_code = """ +def score(input) + Math.sqrt(2.0) +end +""" + + interpreter = RubyInterpreter() + utils.assert_code_equal(interpreter.interpret(expr), expected_code) + + +def test_tanh_expr(): + expr = ast.TanhExpr(ast.NumVal(2.0)) + + expected_code = """ +def score(input) + Math.tanh(2.0) +end +""" + + interpreter = RubyInterpreter() + utils.assert_code_equal(interpreter.interpret(expr), expected_code) + + +def test_reused_expr(): + reused_expr = ast.ExpExpr(ast.NumVal(1.0), to_reuse=True) + expr = ast.BinNumExpr(reused_expr, reused_expr, ast.BinNumOpType.DIV) + + expected_code = """ +def score(input) + var0 = Math.exp(1.0) + (var0).fdiv(var0) +end +""" + + interpreter = RubyInterpreter() + utils.assert_code_equal(interpreter.interpret(expr), expected_code) From b39ee53081eaa0ad1fe8670239f75621f4ad1daf Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Fri, 8 May 2020 17:39:19 +0300 Subject: [PATCH 2/2] try to fix recursion error --- tests/e2e/test_e2e.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/test_e2e.py b/tests/e2e/test_e2e.py index 098c524e..a3440f76 100644 --- a/tests/e2e/test_e2e.py +++ b/tests/e2e/test_e2e.py @@ -15,7 +15,7 @@ from tests.e2e import executors -RECURSION_LIMIT = 5000 +RECURSION_LIMIT = 5500 # pytest marks