Skip to content

Commit

Permalink
Implement regression for C (#40)
Browse files Browse the repository at this point in the history
  • Loading branch information
krinart authored and izeigerman committed Feb 1, 2019
1 parent d536e0f commit 40baf43
Show file tree
Hide file tree
Showing 14 changed files with 322 additions and 21 deletions.
3 changes: 2 additions & 1 deletion m2cgen/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from .exporters import export_to_java, export_to_python
from .exporters import export_to_java, export_to_python, export_to_c


__all__ = [
export_to_java,
export_to_python,
export_to_c,
]
9 changes: 8 additions & 1 deletion m2cgen/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def __init__(self, exprs):
"All expressions for VectorVal must be scalar")

self.exprs = exprs
self.size = len(exprs)

def __str__(self):
args = ",".join([str(e) for e in self.exprs])
Expand All @@ -76,6 +77,7 @@ def __init__(self, left, right, op):
self.left = left
self.right = right
self.op = op
self.size = left.size

def __str__(self):
args = ",".join([str(self.left), str(self.right), self.op.name])
Expand All @@ -91,6 +93,7 @@ def __init__(self, left, right, op):
self.left = left
self.right = right
self.op = op
self.size = left.size

def __str__(self):
args = ",".join([str(self.left), str(self.right), self.op.name])
Expand Down Expand Up @@ -129,7 +132,7 @@ def __str__(self):
# Control Expressions.

class CtrlExpr(Expr):
pass
size = None


class IfExpr(CtrlExpr):
Expand All @@ -142,6 +145,8 @@ def __init__(self, test, body, orelse):
self.orelse = orelse

self.is_vector_output = body.is_vector_output
if self.is_vector_output:
self.size = body.size

def __str__(self):
args = ",".join([str(self.test), str(self.body), str(self.orelse)])
Expand All @@ -152,6 +157,8 @@ class TransparentExpr(CtrlExpr):
def __init__(self, expr):
self.expr = expr
self.is_vector_output = expr.is_vector_output
if self.is_vector_output:
self.size = expr.size


class SubroutineExpr(TransparentExpr):
Expand Down
5 changes: 5 additions & 0 deletions m2cgen/exporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ def export_to_python(model, indent=4):
return _export(model, interpreter)


def export_to_c(model, indent=4):
interpreter = interpreters.CInterpreter(indent=indent)
return _export(model, interpreter)


def _export(model, interpreter):
assembler_cls = _get_assembler_cls(model)
model_ast = assembler_cls(model).assemble()
Expand Down
4 changes: 3 additions & 1 deletion m2cgen/interpreters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from .java.interpreter import JavaInterpreter
from .python.interpreter import PythonInterpreter
from .c.interpreter import CInterpreter

__all__ = [
JavaInterpreter,
PythonInterpreter
PythonInterpreter,
CInterpreter,
]
Empty file.
51 changes: 51 additions & 0 deletions m2cgen/interpreters/c/code_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import contextlib

from m2cgen.interpreters.code_generator import CLikeCodeGenerator
from m2cgen.interpreters.code_generator import CodeTemplate as CT


class CCodeGenerator(CLikeCodeGenerator):

tpl_scalar_var_declare = CT("double ${var_name};")
tpl_vector_var_declare = CT("static double *${var_name}[${size}];")

scalar_type = "double"
vector_type = "double *"

def __init__(self, *args, **kwargs):
super(CCodeGenerator, self).__init__(*args, **kwargs)

def add_function_def(self, name, args, is_vector_output):
return_type = self._get_var_type(is_vector_output)

function_def = return_type + " " + name + "("
function_def += ",".join([
self._get_var_type(is_vector) + " " + n
for is_vector, n in args])
function_def += ") {"
self.add_code_line(function_def)
self.increase_indent()

@contextlib.contextmanager
def function_definition(self, name, args, is_vector_output):
self.add_function_def(name, args, is_vector_output)
yield
self.add_block_termination()

def add_var_declaration(self, expr):
var_name = self.get_var_name()

if expr.is_vector_output:
tpl = self.tpl_vector_var_declare
size = expr.size
else:
tpl = self.tpl_scalar_var_declare
size = None

self.add_code_line(tpl(var_name=var_name, size=size))
return var_name

def _get_var_type(self, is_vector):
return (
self.vector_type if is_vector
else self.scalar_type)
21 changes: 21 additions & 0 deletions m2cgen/interpreters/c/interpreter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from m2cgen.interpreters.interpreter import BaseInterpreter
from m2cgen.interpreters.c.code_generator import CCodeGenerator


class CInterpreter(BaseInterpreter):

def __init__(self, indent=4, *args, **kwargs):
cg = CCodeGenerator(indent=indent)
super(CInterpreter, self).__init__(cg, *args, **kwargs)

def interpret(self, expr):
self._cg.reset_state()

with self._cg.function_definition(
name="score",
args=[(True, self._feature_array_name)],
is_vector_output=expr.is_vector_output):
last_result = self._do_interpret(expr)
self._cg.add_return_statement(last_result)

return self._cg.code
18 changes: 4 additions & 14 deletions m2cgen/interpreters/code_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ class BaseCodeGenerator:
tpl_block_termination = NotImplemented
tpl_var_assignment = NotImplemented

scalar_output_type = NotImplemented
vector_output_type = NotImplemented

def __init__(self, indent=4):
self._indent = indent
self.reset_state()
Expand Down Expand Up @@ -72,9 +69,9 @@ def prepend_code_line(self, line):
def add_return_statement(self, value):
self.add_code_line(self.tpl_return_statement(value=value))

def add_var_declaration(self, is_vector_type=False):
def add_var_declaration(self, expr):
var_name = self.get_var_name()
var_type = self._get_var_type(is_vector_type)
var_type = self._get_var_declare_type(expr.is_vector_output)
self.add_code_line(
self.tpl_var_declaration(
var_type=var_type, var_name=var_name))
Expand Down Expand Up @@ -110,12 +107,8 @@ def array_index_access(self, array_name, index):
return self.tpl_array_index_access(
array_name=array_name, index=index)

# Helpers

def _get_var_type(self, is_vector):
return (
self.vector_output_type if is_vector
else self.scalar_output_type)
def _get_var_declare_type(self, expr):
return NotImplemented


class CLikeCodeGenerator(BaseCodeGenerator):
Expand All @@ -133,6 +126,3 @@ class CLikeCodeGenerator(BaseCodeGenerator):
tpl_else_statement = CodeTemplate("} else {")
tpl_block_termination = CodeTemplate("}")
tpl_var_assignment = CodeTemplate("${var_name} = ${value};")

scalar_output_type = "double"
vector_output_type = "double[]"
3 changes: 1 addition & 2 deletions m2cgen/interpreters/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ def interpret_if_expr(self, expr, if_var_name=None, **kwargs):
if if_var_name is not None:
var_name = if_var_name
else:
var_name = self._cg.add_var_declaration(
is_vector_type=expr.is_vector_output)
var_name = self._cg.add_var_declaration(expr)

def handle_nested_expr(nested):
if isinstance(nested, ast.IfExpr):
Expand Down
12 changes: 10 additions & 2 deletions m2cgen/interpreters/java/code_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

class JavaCodeGenerator(CLikeCodeGenerator):

scalar_output_type = "double"
vector_output_type = "double[]"

def __init__(self, *args, **kwargs):
super(JavaCodeGenerator, self).__init__(*args, **kwargs)

Expand All @@ -14,11 +17,11 @@ def add_class_def(self, class_name, modifier="public"):
self.increase_indent()

def add_method_def(self, name, args, is_vector_output, modifier="public"):
return_type = self._get_var_type(is_vector_output)
return_type = self._get_var_declare_type(is_vector_output)

method_def = modifier + " static " + return_type + " " + name + "("
method_def += ",".join([
self._get_var_type(is_vector) + " " + n
self._get_var_declare_type(is_vector) + " " + n
for is_vector, n in args])
method_def += ") {"
self.add_code_line(method_def)
Expand Down Expand Up @@ -47,3 +50,8 @@ def method_invocation(self, method_name, *args):
def vector_init(self, values):
return "new " + self.vector_output_type + (
" {" + ", ".join(values) + "}")

def _get_var_declare_type(self, is_vector):
return (
self.vector_output_type if is_vector
else self.scalar_output_type)
2 changes: 2 additions & 0 deletions tests/e2e/executors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from tests.e2e.executors.java import JavaExecutor
from tests.e2e.executors.python import PythonExecutor
from tests.e2e.executors.c import CExecutor

__all__ = [
JavaExecutor,
PythonExecutor,
CExecutor,
]
60 changes: 60 additions & 0 deletions tests/e2e/executors/c.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import os
import string
import subprocess

from tests.e2e.executors import base

import m2cgen as m2c


executor_code_tpl = """
#include <stdio.h>
${code}
int main(int argc, char *argv[])
{
double input [argc-1];
for (int i = 1; i < argc; ++i) {
sscanf(argv[i], "%lf", &input[i-1]);
}
printf("%f\\n", score(input));
return 0;
}
"""


class CExecutor(base.BaseExecutor):

model_name = "score"

def __init__(self, model):
self.model = model

self._gcc = "gcc"

def predict(self, X):

exec_args = [os.path.join(self._resource_tmp_dir, self.model_name)]
exec_args.extend(map(str, X))
result = subprocess.Popen(exec_args, stdout=subprocess.PIPE)
items = result.stdout.read().decode("utf-8").split(" ")
if len(items) == 1:
return float(items[0])
else:
return [float(i) for i in items]

def prepare(self):
code = m2c.export_to_c(self.model)
code = string.Template(executor_code_tpl).substitute(code=code)

file_name = os.path.join(
self._resource_tmp_dir, "{}.c".format(self.model_name))
with open(file_name, "w") as f:
f.write(code)

target = os.path.join(self._resource_tmp_dir, self.model_name)
exec_args = [self._gcc] + [file_name] + ["-o", target, "-std=c99"]
subprocess.call(exec_args)
31 changes: 31 additions & 0 deletions tests/e2e/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# pytest marks
PYTHON = pytest.mark.python
JAVA = pytest.mark.java
C = pytest.mark.c
REGRESSION = pytest.mark.regr
CLASSIFICATION = pytest.mark.clf

Expand All @@ -33,6 +34,7 @@ def exec_e2e_test(estimator, executor_cls, model_trainer, is_fast):


@pytest.mark.parametrize("estimator,executor_cls,model_trainer", [
# Java
pytest.param(
linear_model.LinearRegression(),
executors.JavaExecutor,
Expand Down Expand Up @@ -76,6 +78,8 @@ def exec_e2e_test(estimator, executor_cls, model_trainer, is_fast):
utils.train_model_regression,
marks=[JAVA, REGRESSION],
),
# Python
pytest.param(
ensemble.RandomForestClassifier(n_estimators=10,
random_state=RANDOM_SEED),
Expand Down Expand Up @@ -147,6 +151,33 @@ def exec_e2e_test(estimator, executor_cls, model_trainer, is_fast):
utils.train_model_classification,
marks=[PYTHON, CLASSIFICATION],
),
# C
pytest.param(
linear_model.LinearRegression(),
executors.CExecutor,
utils.train_model_regression,
marks=[C, REGRESSION],
),
pytest.param(
tree.DecisionTreeRegressor(random_state=RANDOM_SEED),
executors.CExecutor,
utils.train_model_regression,
marks=[C, REGRESSION],
),
pytest.param(
ensemble.RandomForestRegressor(n_estimators=10,
random_state=RANDOM_SEED),
executors.CExecutor,
utils.train_model_regression,
marks=[C, REGRESSION],
),
pytest.param(
linear_model.LogisticRegression(),
executors.CExecutor,
utils.train_model_classification_binary,
marks=[C, CLASSIFICATION],
),
])
def test_e2e(estimator, executor_cls, model_trainer, is_fast):
exec_e2e_test(estimator, executor_cls, model_trainer, is_fast)

0 comments on commit 40baf43

Please sign in to comment.