Skip to content

Commit

Permalink
Merge branch 'master' into docker
Browse files Browse the repository at this point in the history
  • Loading branch information
StrikerRUS committed Aug 5, 2020
2 parents 268a697 + 4b3fb61 commit 0a09f5c
Show file tree
Hide file tree
Showing 29 changed files with 213 additions and 208 deletions.
4 changes: 4 additions & 0 deletions README.md
Expand Up @@ -131,3 +131,7 @@ A: If this error occurs while generating code using an ensemble model, try to re
**Q: Generation fails with `ImportError: No module named <module_name_here>` error while transpiling model from a serialized model object.**

A: This error indicates that pickle protocol cannot deserialize model object. For unpickling serialized model objects, it is required that their classes must be defined in the top level of an importable module in the unpickling environment. So installation of package which provided model's class definition should solve the problem.

**Q: Generated by m2cgen code provides different results for some inputs compared to original Python model from which the code were obtained.**

A: Some models force input data to be particular type during prediction phase in their native Python libraries. Currently, m2cgen works only with ``float64`` (``double``) data type. You can try to cast your input data to another type manually and check results again. Also, some small differences can happen due to specific implementation of floating-point arithmetic in a target language.
2 changes: 1 addition & 1 deletion m2cgen/assemblers/boosting.py
Expand Up @@ -151,7 +151,7 @@ def __init__(self, model):

def _assemble_tree(self, tree):
if "leaf" in tree:
return ast.NumVal(tree["leaf"])
return ast.NumVal(tree["leaf"], dtype=np.float32)

threshold = ast.NumVal(tree["split_condition"], dtype=np.float32)
split = tree["split"]
Expand Down
10 changes: 1 addition & 9 deletions m2cgen/assemblers/tree.py
@@ -1,5 +1,3 @@
import numpy as np

from m2cgen import ast
from m2cgen.assemblers import utils
from m2cgen.assemblers.base import ModelAssembler
Expand Down Expand Up @@ -49,11 +47,5 @@ def _assemble_leaf(self, node_id):

def _assemble_cond(self, node_id):
feature_idx = self._tree.feature[node_id]
threshold = self._tree.threshold[node_id]

# sklearn's trees internally work with float32 numbers, so in order
# to have consistent results across all supported languages, we convert
# all thresholds into float32.
threshold_num_val = ast.NumVal(threshold, dtype=np.float32)

threshold_num_val = ast.NumVal(self._tree.threshold[node_id])
return utils.lte(ast.FeatureRef(feature_idx), threshold_num_val)
67 changes: 62 additions & 5 deletions m2cgen/interpreters/code_generator.py
@@ -1,6 +1,10 @@
from io import StringIO
from weakref import finalize

import numpy as np

from m2cgen.interpreters.utils import format_float


class CodeTemplate:

Expand All @@ -11,12 +15,14 @@ def __str__(self):
return self.str_template

def __call__(self, *args, **kwargs):
# Force calling str() representation
# because without it numpy gives the same output
# for different float types

def _is_float(value):
return isinstance(value, (float, np.floating))

return self.str_template.format(
*[str(i) for i in args],
**{k: str(v) for k, v in kwargs.items()})
*[format_float(i) if _is_float(i) else i for i in args],
**{k: format_float(v) if _is_float(v) else v
for k, v in kwargs.items()})


class BaseCodeGenerator:
Expand Down Expand Up @@ -201,3 +207,54 @@ class CLikeCodeGenerator(ImperativeCodeGenerator):
tpl_else_statement = CodeTemplate("}} else {{")
tpl_block_termination = CodeTemplate("}}")
tpl_var_assignment = CodeTemplate("{var_name} = {value};")


class FunctionalCodeGenerator(BaseCodeGenerator):
"""
This class provides basic functionality to generate code. It is
language-agnostic, but exposes set of attributes which subclasses should
use to define syntax specific for certain language(s).
!!IMPORTANT!!: Code generators must know nothing about AST.
"""

tpl_function_signature = NotImplemented
tpl_if_statement = NotImplemented
tpl_else_statement = NotImplemented
tpl_block_termination = NotImplemented

def reset_state(self):
super().reset_state()
self._func_idx = 0

def get_func_name(self):
func_name = f"func{self._func_idx}"
self._func_idx += 1
return func_name

# Following statements compute expressions using templates AND add
# it to the result.

def add_function(self, function_name, function_body):
self.add_code_line(self.tpl_function_signature(
function_name=function_name))
self.increase_indent()
self.add_code_lines(function_body)
self.decrease_indent()

def function_invocation(self, function_name, *args):
function_args = " ".join(map(lambda x: f"({x})", args))
return f"{function_name} {function_args}"

def add_if_statement(self, if_def):
self.add_code_line(self.tpl_if_statement(if_def=if_def))
self.increase_indent()

def add_else_statement(self):
self.decrease_indent()
self.add_code_line(self.tpl_else_statement())
self.increase_indent()

def add_block_termination(self):
self.decrease_indent()
self.add_code_line(self.tpl_block_termination())
36 changes: 6 additions & 30 deletions m2cgen/interpreters/f_sharp/code_generator.py
@@ -1,45 +1,21 @@
import contextlib

from m2cgen.ast import CompOpType
from m2cgen.interpreters.code_generator import BaseCodeGenerator, CodeTemplate
from m2cgen.interpreters.code_generator \
import FunctionalCodeGenerator, CodeTemplate


class FSharpCodeGenerator(BaseCodeGenerator):
class FSharpCodeGenerator(FunctionalCodeGenerator):
tpl_function_signature = CodeTemplate("let {function_name} =")
tpl_if_statement = CodeTemplate("if ({if_def}) then")
tpl_else_statement = CodeTemplate("else")
tpl_num_value = CodeTemplate("{value}")
tpl_infix_expression = CodeTemplate("({left}) {op} ({right})")
tpl_array_index_access = CodeTemplate("{array_name}.[{index}]")

def reset_state(self):
super().reset_state()
self._func_idx = 0

def add_if_statement(self, if_def):
self.add_code_line(f"if ({if_def}) then")
self.increase_indent()

def add_else_statement(self):
self.decrease_indent()
self.add_code_line("else")
self.increase_indent()

def add_if_termination(self):
self.decrease_indent()

def get_func_name(self):
func_name = f"func{self._func_idx}"
self._func_idx += 1
return func_name

def add_function(self, function_name, function_body):
self.add_code_line(f"let {function_name} =")
self.increase_indent()
self.add_code_lines(function_body)
self.decrease_indent()

def function_invocation(self, function_name, *args):
function_args = " ".join(map(lambda x: f"({x})", args))
return f"{function_name} {function_args}"

def add_function_def(self, name, args):
func_args = " ".join(
[f"({n} : double{' list' if is_vector else ''})"
Expand Down
40 changes: 5 additions & 35 deletions m2cgen/interpreters/f_sharp/interpreter.py
Expand Up @@ -3,10 +3,10 @@
from m2cgen import ast
from m2cgen.interpreters import mixins, utils
from m2cgen.interpreters.f_sharp.code_generator import FSharpCodeGenerator
from m2cgen.interpreters.interpreter import ToCodeInterpreter
from m2cgen.interpreters.interpreter import FunctionalToCodeInterpreter


class FSharpInterpreter(ToCodeInterpreter,
class FSharpInterpreter(FunctionalToCodeInterpreter,
mixins.LinearAlgebraMixin,
mixins.BinExpressionDepthTrackingMixin):

Expand Down Expand Up @@ -38,8 +38,7 @@ def __init__(self, indent=4, function_name="score", *args, **kwargs):
self.indent = indent
self.function_name = function_name

cg = FSharpCodeGenerator(indent=indent)
super().__init__(cg, *args, **kwargs)
super().__init__(self.create_code_generator(), *args, **kwargs)

def interpret(self, expr):
self._cg.reset_state()
Expand All @@ -65,26 +64,8 @@ def interpret(self, expr):

return self._cg.finalize_and_get_generated_code()

def interpret_if_expr(self, expr, if_code_gen=None, **kwargs):
if if_code_gen is None:
code_gen = FSharpCodeGenerator(indent=self.indent)
nested = False
else:
code_gen = if_code_gen
nested = True

code_gen.add_if_statement(self._do_interpret(
expr.test, **kwargs))
code_gen.add_code_line(self._do_interpret(
expr.body, if_code_gen=code_gen, **kwargs))
code_gen.add_else_statement()
code_gen.add_code_line(self._do_interpret(
expr.orelse, if_code_gen=code_gen, **kwargs))
code_gen.add_if_termination()

if not nested:
return self._cache_reused_expr(
expr, code_gen.finalize_and_get_generated_code())
def create_code_generator(self):
return FSharpCodeGenerator(indent=self.indent)

def interpret_pow_expr(self, expr, **kwargs):
base_result = self._do_interpret(expr.base_expr, **kwargs)
Expand All @@ -96,17 +77,6 @@ def interpret_log1p_expr(self, expr, **kwargs):
self.with_log1p_expr = True
return super().interpret_log1p_expr(expr, **kwargs)

# Cached expressions become functions with no arguments, i.e. values
# which are CAFs. Therefore, they are computed only once.
def _cache_reused_expr(self, expr, expr_result):
if expr in self._cached_expr_results:
return self._cached_expr_results[expr].var_name
else:
func_name = self._cg.get_func_name()
self._cached_expr_results[expr] = utils.CachedResult(
var_name=func_name, expr_result=expr_result)
return func_name

def _dump_cache(self):
if self._cached_expr_results:
for func_name, expr_result in self._cached_expr_results.values():
Expand Down
39 changes: 6 additions & 33 deletions m2cgen/interpreters/haskell/code_generator.py
@@ -1,51 +1,24 @@
import contextlib

from m2cgen.ast import CompOpType
from m2cgen.interpreters.code_generator import BaseCodeGenerator, CodeTemplate
from m2cgen.interpreters.code_generator \
import FunctionalCodeGenerator, CodeTemplate


class HaskellCodeGenerator(BaseCodeGenerator):
class HaskellCodeGenerator(FunctionalCodeGenerator):
tpl_function_signature = CodeTemplate("{function_name} =")
tpl_if_statement = CodeTemplate("if ({if_def}) then")
tpl_else_statement = CodeTemplate("else")
tpl_num_value = CodeTemplate("{value}")
tpl_infix_expression = CodeTemplate("({left}) {op} ({right})")
tpl_module_definition = CodeTemplate("module {module_name} where")

def reset_state(self):
super().reset_state()
self._func_idx = 0

def array_index_access(self, array_name, index):
return self.tpl_infix_expression(
left=array_name, op="!!", right=index)

def add_if_statement(self, if_def):
self.add_code_line(f"if ({if_def})")
self.increase_indent()
self.add_code_line("then")
self.increase_indent()

def add_else_statement(self):
self.decrease_indent()
self.add_code_line("else")
self.increase_indent()

def add_if_termination(self):
self.decrease_indent()
self.decrease_indent()

def get_func_name(self):
func_name = f"func{self._func_idx}"
self._func_idx += 1
return func_name

def add_function(self, function_name, function_body):
self.add_code_line(f"{function_name} =")
self.increase_indent()
self.add_code_lines(function_body)
self.decrease_indent()

def function_invocation(self, function_name, *args):
function_args = " ".join(map(lambda x: f"({x})", args))
return f"{function_name} {function_args}"

def add_function_def(self, name, args, is_scalar_output):
types = " -> ".join(
Expand Down
40 changes: 5 additions & 35 deletions m2cgen/interpreters/haskell/interpreter.py
Expand Up @@ -3,10 +3,10 @@
from m2cgen import ast
from m2cgen.interpreters import mixins, utils
from m2cgen.interpreters.haskell.code_generator import HaskellCodeGenerator
from m2cgen.interpreters.interpreter import ToCodeInterpreter
from m2cgen.interpreters.interpreter import FunctionalToCodeInterpreter


class HaskellInterpreter(ToCodeInterpreter,
class HaskellInterpreter(FunctionalToCodeInterpreter,
mixins.LinearAlgebraMixin):
supported_bin_vector_ops = {
ast.BinNumOpType.ADD: "addVectors",
Expand All @@ -31,8 +31,7 @@ def __init__(self, module_name="Model", indent=4, function_name="score",
self.indent = indent
self.function_name = function_name

cg = HaskellCodeGenerator(indent=indent)
super().__init__(cg, *args, **kwargs)
super().__init__(self.create_code_generator(), *args, **kwargs)

def interpret(self, expr):
self._cg.reset_state()
Expand Down Expand Up @@ -64,26 +63,8 @@ def interpret(self, expr):

return self._cg.finalize_and_get_generated_code()

def interpret_if_expr(self, expr, if_code_gen=None, **kwargs):
if if_code_gen is None:
code_gen = HaskellCodeGenerator(indent=self.indent)
nested = False
else:
code_gen = if_code_gen
nested = True

code_gen.add_if_statement(self._do_interpret(
expr.test, **kwargs))
code_gen.add_code_line(self._do_interpret(
expr.body, if_code_gen=code_gen, **kwargs))
code_gen.add_else_statement()
code_gen.add_code_line(self._do_interpret(
expr.orelse, if_code_gen=code_gen, **kwargs))
code_gen.add_if_termination()

if not nested:
return self._cache_reused_expr(
expr, code_gen.finalize_and_get_generated_code())
def create_code_generator(self):
return HaskellCodeGenerator(indent=self.indent)

def interpret_pow_expr(self, expr, **kwargs):
base_result = self._do_interpret(expr.base_expr, **kwargs)
Expand All @@ -95,17 +76,6 @@ def interpret_log1p_expr(self, expr, **kwargs):
self.with_log1p_expr = True
return super().interpret_log1p_expr(expr, **kwargs)

# Cached expressions become functions with no arguments, i.e. values
# which are CAFs. Therefore, they are computed only once.
def _cache_reused_expr(self, expr, expr_result):
if expr in self._cached_expr_results:
return self._cached_expr_results[expr].var_name
else:
func_name = self._cg.get_func_name()
self._cached_expr_results[expr] = utils.CachedResult(
var_name=func_name, expr_result=expr_result)
return func_name

def _dump_cache(self):
if self._cached_expr_results:
self._cg.add_code_line("where")
Expand Down

0 comments on commit 0a09f5c

Please sign in to comment.