Skip to content

Commit

Permalink
Merge 207436f into 592ab8d
Browse files Browse the repository at this point in the history
  • Loading branch information
izeigerman committed Jan 29, 2019
2 parents 592ab8d + 207436f commit 2f93980
Show file tree
Hide file tree
Showing 18 changed files with 454 additions and 95 deletions.
4 changes: 2 additions & 2 deletions m2cgen/assemblers/__init__.py
@@ -1,9 +1,9 @@
from .linear import LinearRegressionAssembler
from .linear import LinearModelAssembler
from .tree import TreeModelAssembler
from .ensemble import RandomForestModelAssembler

__all__ = [
LinearRegressionAssembler,
LinearModelAssembler,
TreeModelAssembler,
RandomForestModelAssembler
]
35 changes: 26 additions & 9 deletions m2cgen/assemblers/linear.py
Expand Up @@ -3,16 +3,33 @@
from m2cgen.assemblers.base import ModelAssembler


class LinearRegressionAssembler(ModelAssembler):
class LinearModelAssembler(ModelAssembler):

def assemble(self):
feature_weight_mul_ops = []
return self._build_ast()

for (index, value) in enumerate(self.model.coef_):
feature_weight_mul_ops.append(
utils.mul(ast.FeatureRef(index), ast.NumVal(value)))
def _build_ast(self):
coef = utils.to_2d_array(self.model.coef_)
intercept = utils.to_1d_array(self.model.intercept_)

return utils.apply_op_to_expressions(
ast.BinNumOpType.ADD,
ast.NumVal(self.model.intercept_),
*feature_weight_mul_ops)
if coef.shape[0] == 1:
return _linear_to_ast(coef[0], intercept[0])

exprs = []
for idx in range(coef.shape[0]):
exprs.append(ast.SubroutineExpr(
_linear_to_ast(coef[idx], intercept[idx])))
return ast.VectorExpr(exprs)


def _linear_to_ast(coef, intercept):
feature_weight_mul_ops = []

for index, value in enumerate(coef):
feature_weight_mul_ops.append(
utils.mul(ast.FeatureRef(index), ast.NumVal(value)))

return utils.apply_op_to_expressions(
ast.BinNumOpType.ADD,
ast.NumVal(intercept),
*feature_weight_mul_ops)
14 changes: 12 additions & 2 deletions m2cgen/assemblers/tree.py
Expand Up @@ -3,6 +3,7 @@
from m2cgen import ast
from m2cgen.assemblers import utils
from m2cgen.assemblers.base import ModelAssembler
from sklearn import tree
from sklearn.tree._tree import TREE_LEAF


Expand All @@ -11,6 +12,9 @@ class TreeModelAssembler(ModelAssembler):
def __init__(self, model):
super().__init__(model)
self._tree = model.tree_
self._is_vector_output = False
if isinstance(self.model, tree.DecisionTreeClassifier):
self._is_vector_output = self.model.n_classes_ > 1

def assemble(self):
return self._assemble_node(0)
Expand All @@ -31,8 +35,14 @@ def _assemble_branch(self, node_id):

def _assemble_leaf(self, node_id):
scores = self._tree.value[node_id][0]
assert len(scores) == 1, "Only regression trees are supported"
return ast.NumVal(scores[0])
if self._is_vector_output:
outputs = []
for s in scores:
outputs.append(ast.NumVal(s))
return ast.VectorExpr(outputs)
else:
assert len(scores) == 1, "Unexpected number of outputs"
return ast.NumVal(scores[0])

def _assemble_cond(self, node_id):
feature_idx = self._tree.feature[node_id]
Expand Down
13 changes: 13 additions & 0 deletions m2cgen/assemblers/utils.py
@@ -1,3 +1,4 @@
import numpy as np
from m2cgen import ast


Expand All @@ -21,3 +22,15 @@ def _inner(current_expr, *rest_exprs):
*rest_exprs[1:])

return _inner(ast.BinNumExpr(exprs[0], exprs[1], op), *exprs[2:])


def to_1d_array(var):
return np.reshape(np.asarray(var), (np.size(var)))


def to_2d_array(var):
if len(np.shape(var)) == 2:
x, y = var.shape
else:
x, y = 1, np.size(var)
return np.reshape(np.asarray(var), (x, y))
29 changes: 28 additions & 1 deletion m2cgen/ast.py
Expand Up @@ -2,7 +2,7 @@


class Expr:
pass
is_vector_output = False


class FeatureRef(Expr):
Expand Down Expand Up @@ -36,6 +36,9 @@ class BinNumOpType(Enum):

class BinNumExpr(NumExpr):
def __init__(self, left, right, op):
assert not left.is_vector_output, "Only scalars are supported"
assert not right.is_vector_output, "Only scalars are supported"

self.left = left
self.right = right
self.op = op
Expand All @@ -45,6 +48,20 @@ def __str__(self):
return "BinNumExpr(" + args + ")"


class VectorExpr(NumExpr):
is_vector_output = True

def __init__(self, exprs):
assert all(map(lambda e: not e.is_vector_output, exprs)), (
"All expressions for VectorExpr must be scalar")

self.exprs = exprs

def __str__(self):
args = ",".join([str(e) for e in self.exprs])
return "VectorExpr([" + args + "])"


# Boolean Expressions.

class BoolExpr(Expr):
Expand All @@ -62,6 +79,9 @@ class CompOpType(Enum):

class CompExpr(BoolExpr):
def __init__(self, left, right, op):
assert not left.is_vector_output, "Only scalars are supported"
assert not right.is_vector_output, "Only scalars are supported"

self.left = left
self.right = right
self.op = op
Expand All @@ -79,10 +99,15 @@ class CtrlExpr(Expr):

class IfExpr(CtrlExpr):
def __init__(self, test, body, orelse):
assert not (body.is_vector_output ^ orelse.is_vector_output), (
"body and orelse expressions should have same is_vector_output")

self.test = test
self.body = body
self.orelse = orelse

self.is_vector_output = body.is_vector_output

def __str__(self):
args = ",".join([str(self.test), str(self.body), str(self.orelse)])
return "IfExpr(" + args + ")"
Expand All @@ -91,8 +116,10 @@ def __str__(self):
class TransparentExpr(CtrlExpr):
def __init__(self, expr):
self.expr = expr
self.is_vector_output = expr.is_vector_output


class SubroutineExpr(TransparentExpr):

def __str__(self):
return "SubroutineExpr(" + str(self.expr) + ")"
4 changes: 3 additions & 1 deletion m2cgen/exporters.py
Expand Up @@ -7,8 +7,10 @@ class BaseExporter:
interpreter = None

models_to_assemblers = {
"LinearRegression": assemblers.LinearRegressionAssembler,
"LinearRegression": assemblers.LinearModelAssembler,
"LogisticRegression": assemblers.LinearModelAssembler,
"DecisionTreeRegressor": assemblers.TreeModelAssembler,
"DecisionTreeClassifier": assemblers.TreeModelAssembler,
"RandomForestRegressor": assemblers.RandomForestModelAssembler,
}

Expand Down
16 changes: 15 additions & 1 deletion m2cgen/interpreters/code_generator.py
Expand Up @@ -26,6 +26,9 @@ class BaseCodeGenerator:
tpl_block_termination = NotImplemented
tpl_var_assignment = NotImplemented

scalar_output_type = NotImplemented
vector_output_type = NotImplemented

def __init__(self, indent=4):
self._indent = indent
self.reset_state()
Expand Down Expand Up @@ -66,8 +69,9 @@ def add_code_lines(self, lines):
def add_return_statement(self, value):
self.add_code_line(self.tpl_return_statement(value=value))

def add_var_declaration(self, var_type="double"):
def add_var_declaration(self, is_vector_type=False):
var_name = self.get_var_name()
var_type = self._get_var_type(is_vector_type)
self.add_code_line(
self.tpl_var_declaration(
var_type=var_type, var_name=var_name))
Expand Down Expand Up @@ -103,6 +107,13 @@ def array_index_access(self, array_name, index):
return self.tpl_array_index_access(
array_name=array_name, index=index)

# Helpers

def _get_var_type(self, is_vector):
return (
self.vector_output_type if is_vector
else self.scalar_output_type)


class CLikeCodeGenerator(BaseCodeGenerator):
"""
Expand All @@ -119,3 +130,6 @@ class CLikeCodeGenerator(BaseCodeGenerator):
tpl_else_statement = CodeTemplate("} else {")
tpl_block_termination = CodeTemplate("}")
tpl_var_assignment = CodeTemplate("${var_name} = ${value};")

scalar_output_type = "double"
vector_output_type = "double[]"
19 changes: 9 additions & 10 deletions m2cgen/interpreters/interpreter.py
Expand Up @@ -18,41 +18,40 @@ def interpret_if_expr(self, expr, if_var_name=None, **kwargs):
if if_var_name is not None:
var_name = if_var_name
else:
var_name = self._cg.add_var_declaration()

if_def = self._do_interpret(expr.test, **kwargs)
self._cg.add_if_statement(if_def)
var_name = self._cg.add_var_declaration(
is_vector_type=expr.is_vector_output)

def handle_nested_expr(nested):
if isinstance(nested, ast.IfExpr):
self._do_interpret(nested, if_var_name=var_name, **kwargs)
else:
self._cg.add_var_assignment(
var_name, self._do_interpret(nested))
nested_result = self._do_interpret(nested)
self._cg.add_var_assignment(var_name, nested_result)

self._cg.add_if_statement(self._do_interpret(expr.test, **kwargs))
handle_nested_expr(expr.body)
self._cg.add_else_statement()
handle_nested_expr(expr.orelse)
self._cg.add_block_termination()

return var_name

def interpret_comp_expr(self, expr):
def interpret_comp_expr(self, expr, **kwargs):
return self._cg.infix_expression(
left=self._do_interpret(expr.left),
op=expr.op.value,
right=self._do_interpret(expr.right))

def interpret_bin_num_expr(self, expr):
def interpret_bin_num_expr(self, expr, **kwargs):
return self._cg.infix_expression(
left=self._do_interpret(expr.left),
op=expr.op.value,
right=self._do_interpret(expr.right))

def interpret_num_val(self, expr):
def interpret_num_val(self, expr, **kwargs):
return self._cg.num_value(value=expr.value)

def interpret_feature_ref(self, expr):
def interpret_feature_ref(self, expr, **kwargs):
return self._cg.array_index_access(
array_name=self._feature_array_name,
index=expr.index)
Expand Down
20 changes: 16 additions & 4 deletions m2cgen/interpreters/java/code_generator.py
Expand Up @@ -13,9 +13,13 @@ def add_class_def(self, class_name, modifier="public"):
self.add_code_line(class_def)
self.increase_indent()

def add_method_def(self, name, args, return_type, modifier="public"):
def add_method_def(self, name, args, is_vector_output, modifier="public"):
return_type = self._get_var_type(is_vector_output)

method_def = modifier + " static " + return_type + " " + name + "("
method_def += ",".join([t + " " + n for t, n in args])
method_def += ",".join([
self._get_var_type(is_vector) + " " + n
for is_vector, n in args])
method_def += ") {"
self.add_code_line(method_def)
self.increase_indent()
Expand All @@ -31,7 +35,15 @@ def class_definition(self, model_name):
self.add_block_termination()

@contextlib.contextmanager
def method_definition(self, name, args, return_type, modifier="public"):
self.add_method_def(name, args, return_type, modifier=modifier)
def method_definition(self, name, args, is_vector_output,
modifier="public"):
self.add_method_def(name, args, is_vector_output, modifier=modifier)
yield
self.add_block_termination()

def method_invocation(self, method_name, *args):
return method_name + "(" + ", ".join(args) + ")"

def array_init(self, values):
return "new " + self.vector_output_type + (
" {" + ", ".join(values) + "}")

0 comments on commit 2f93980

Please sign in to comment.