Skip to content

Commit

Permalink
Merge branch 'master' into exprs
Browse files Browse the repository at this point in the history
  • Loading branch information
StrikerRUS committed May 20, 2020
2 parents 3edb263 + 4869e00 commit eb470a1
Show file tree
Hide file tree
Showing 13 changed files with 75 additions and 64 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -47,7 +47,7 @@ pip install m2cgen
| **Linear** | <ul><li>scikit-learn<ul><li>LogisticRegression</li><li>LogisticRegressionCV</li><li>PassiveAggressiveClassifier</li><li>Perceptron</li><li>RidgeClassifier</li><li>RidgeClassifierCV</li><li>SGDClassifier</li></ul></li><li>lightning<ul><li>AdaGradClassifier</li><li>CDClassifier</li><li>FistaClassifier</li><li>SAGAClassifier</li><li>SAGClassifier</li><li>SDCAClassifier</li><li>SGDClassifier</li></ul></li></ul> | <ul><li>scikit-learn<ul><li>ARDRegression</li><li>BayesianRidge</li><li>ElasticNet</li><li>ElasticNetCV</li><li>HuberRegressor</li><li>Lars</li><li>LarsCV</li><li>Lasso</li><li>LassoCV</li><li>LassoLars</li><li>LassoLarsCV</li><li>LassoLarsIC</li><li>LinearRegression</li><li>OrthogonalMatchingPursuit</li><li>OrthogonalMatchingPursuitCV</li><li>PassiveAggressiveRegressor</li><li>RANSACRegressor(only supported regression estimators can be used as a base estimator)</li><li>Ridge</li><li>RidgeCV</li><li>SGDRegressor</li><li>TheilSenRegressor</li></ul><li>StatsModels<ul><li>Generalized Least Squares (GLS)</li><li>Generalized Least Squares with AR Errors (GLSAR)</li><li>Generalized Linear Models (GLM)</li><li>Ordinary Least Squares (OLS)</li><li>[Gaussian] Process Regression Using Maximum Likelihood-based Estimation (ProcessMLE)</li><li>Quantile Regression (QuantReg)</li><li>Weighted Least Squares (WLS)</li></ul><li>lightning<ul><li>AdaGradRegressor</li><li>CDRegressor</li><li>FistaRegressor</li><li>SAGARegressor</li><li>SAGRegressor</li><li>SDCARegressor</li></ul></li></ul> |
| **SVM** | <ul><li>scikit-learn<ul><li>LinearSVC</li><li>NuSVC</li><li>SVC</li></ul></li><li>lightning<ul><li>KernelSVC</li><li>LinearSVC</li></ul></li></ul> | <ul><li>scikit-learn<ul><li>LinearSVR</li><li>NuSVR</li><li>SVR</li></ul></li><li>lightning<ul><li>LinearSVR</li></ul></li></ul> |
| **Tree** | <ul><li>DecisionTreeClassifier</li><li>ExtraTreeClassifier</li></ul> | <ul><li>DecisionTreeRegressor</li><li>ExtraTreeRegressor</li></ul> |
| **Random Forest** | <ul><li>ExtraTreesClassifier</li><li>LGBMClassifier(rf booster only)</li><li>RandomForestClassifier</li><li>XGBRFClassifier(binary only, multiclass is not supported yet)</li></ul> | <ul><li>ExtraTreesRegressor</li><li>LGBMRegressor(rf booster only)</li><li>RandomForestRegressor</li><li>XGBRFRegressor</li></ul> |
| **Random Forest** | <ul><li>ExtraTreesClassifier</li><li>LGBMClassifier(rf booster only)</li><li>RandomForestClassifier</li><li>XGBRFClassifier</li></ul> | <ul><li>ExtraTreesRegressor</li><li>LGBMRegressor(rf booster only)</li><li>RandomForestRegressor</li><li>XGBRFRegressor</li></ul> |
| **Boosting** | <ul><li>LGBMClassifier(gbdt/dart/goss booster only)</li><li>XGBClassifier(gbtree/gblinear booster only)</li><ul> | <ul><li>LGBMRegressor(gbdt/dart/goss booster only)</li><li>XGBRegressor(gbtree/gblinear booster only)</li></ul> |

You can find versions of packages with which compatibility is guaranteed by CI tests [here](https://github.com/BayesWitnesses/m2cgen/blob/master/requirements-test.txt#L1).
Expand Down
23 changes: 15 additions & 8 deletions m2cgen/assemblers/boosting.py
Expand Up @@ -10,6 +10,7 @@
class BaseBoostingAssembler(ModelAssembler):

classifier_names = {}
strided_layout_for_multiclass = True

def __init__(self, model, estimator_params, base_score=0):
super().__init__(model)
Expand Down Expand Up @@ -54,7 +55,8 @@ def _assemble_multi_class_output(self, estimator_params):
# Multi-class output is calculated based on discussion in
# https://github.com/dmlc/xgboost/issues/1746#issuecomment-295962863
splits = _split_estimator_params_by_classes(
estimator_params, self._output_size)
estimator_params, self._output_size,
self.strided_layout_for_multiclass)

base_score = self._base_score
exprs = [
Expand Down Expand Up @@ -112,9 +114,8 @@ class XGBoostTreeModelAssembler(BaseTreeBoostingAssembler):
classifier_names = {"XGBClassifier", "XGBRFClassifier"}

def __init__(self, model):
if type(model).__name__ == "XGBRFClassifier" and model.n_classes_ > 2:
raise RuntimeError(
"Multiclass XGBRFClassifier is not supported yet")
if type(model).__name__ == "XGBRFClassifier":
self.strided_layout_for_multiclass = False
feature_names = model.get_booster().feature_names
self._feature_name_to_idx = {
name: idx for idx, name in enumerate(feature_names or [])
Expand Down Expand Up @@ -243,7 +244,13 @@ def _assemble_tree(self, tree):
self._assemble_tree(false_child))


def _split_estimator_params_by_classes(values, n_classes):
# Splits are computed based on a comment
# https://github.com/dmlc/xgboost/issues/1746#issuecomment-267400592.
return [values[class_idx::n_classes] for class_idx in range(n_classes)]
def _split_estimator_params_by_classes(values, n_classes, strided):
if strided:
# Splits are computed based on a comment
# https://github.com/dmlc/xgboost/issues/1746#issuecomment-267400592.
return [values[class_idx::n_classes] for class_idx in range(n_classes)]
else:
values_len = len(values)
block_len = values_len // n_classes
return [values[start_block_idx:start_block_idx + block_len]
for start_block_idx in range(0, values_len, block_len)]
4 changes: 2 additions & 2 deletions m2cgen/interpreters/c/code_generator.py
Expand Up @@ -6,8 +6,8 @@

class CCodeGenerator(CLikeCodeGenerator):

tpl_scalar_var_declare = CT("double ${var_name};")
tpl_vector_var_declare = CT("double ${var_name}[${size}];")
tpl_scalar_var_declare = CT("double {var_name};")
tpl_vector_var_declare = CT("double {var_name}[{size}];")

scalar_type = "double"
vector_type = "double *"
Expand Down
27 changes: 15 additions & 12 deletions m2cgen/interpreters/code_generator.py
@@ -1,19 +1,22 @@
from io import StringIO
from string import Template
from weakref import finalize


class CodeTemplate:

def __init__(self, template):
self.template = Template(template)
self.str_template = template

def __str__(self):
return self.str_template

def __call__(self, *args, **kwargs):
return self.template.substitute(*args, **kwargs)
# Force calling str() representation
# because without it numpy gives the same output
# for different float types
return self.str_template.format(
*[str(i) for i in args],
**{k: str(v) for k, v in kwargs.items()})


class BaseCodeGenerator:
Expand Down Expand Up @@ -187,12 +190,12 @@ class CLikeCodeGenerator(ImperativeCodeGenerator):
have to provide logic for wrapping expressions into functions/classes/etc.
"""

tpl_num_value = CodeTemplate("${value}")
tpl_infix_expression = CodeTemplate("(${left}) ${op} (${right})")
tpl_var_declaration = CodeTemplate("${var_type} ${var_name};")
tpl_return_statement = CodeTemplate("return ${value};")
tpl_array_index_access = CodeTemplate("${array_name}[${index}]")
tpl_if_statement = CodeTemplate("if (${if_def}) {")
tpl_else_statement = CodeTemplate("} else {")
tpl_block_termination = CodeTemplate("}")
tpl_var_assignment = CodeTemplate("${var_name} = ${value};")
tpl_num_value = CodeTemplate("{value}")
tpl_infix_expression = CodeTemplate("({left}) {op} ({right})")
tpl_var_declaration = CodeTemplate("{var_type} {var_name};")
tpl_return_statement = CodeTemplate("return {value};")
tpl_array_index_access = CodeTemplate("{array_name}[{index}]")
tpl_if_statement = CodeTemplate("if ({if_def}) {{")
tpl_else_statement = CodeTemplate("}} else {{")
tpl_block_termination = CodeTemplate("}}")
tpl_var_assignment = CodeTemplate("{var_name} = {value};")
18 changes: 9 additions & 9 deletions m2cgen/interpreters/go/code_generator.py
Expand Up @@ -5,15 +5,15 @@


class GoCodeGenerator(ImperativeCodeGenerator):
tpl_num_value = CodeTemplate("${value}")
tpl_infix_expression = CodeTemplate("(${left}) ${op} (${right})")
tpl_array_index_access = CodeTemplate("${array_name}[${index}]")
tpl_else_statement = CodeTemplate("} else {")
tpl_block_termination = CodeTemplate("}")
tpl_var_declaration = CodeTemplate("var ${var_name} ${var_type}")
tpl_return_statement = CodeTemplate("return ${value}")
tpl_if_statement = CodeTemplate("if ${if_def} {")
tpl_var_assignment = CodeTemplate("${var_name} = ${value}")
tpl_num_value = CodeTemplate("{value}")
tpl_infix_expression = CodeTemplate("({left}) {op} ({right})")
tpl_array_index_access = CodeTemplate("{array_name}[{index}]")
tpl_else_statement = CodeTemplate("}} else {{")
tpl_block_termination = CodeTemplate("}}")
tpl_var_declaration = CodeTemplate("var {var_name} {var_type}")
tpl_return_statement = CodeTemplate("return {value}")
tpl_if_statement = CodeTemplate("if {if_def} {{")
tpl_var_assignment = CodeTemplate("{var_name} = {value}")

scalar_type = "float64"
vector_type = "[]float64"
Expand Down
6 changes: 3 additions & 3 deletions m2cgen/interpreters/haskell/code_generator.py
Expand Up @@ -5,9 +5,9 @@


class HaskellCodeGenerator(BaseCodeGenerator):
tpl_num_value = CodeTemplate("${value}")
tpl_infix_expression = CodeTemplate("(${left}) ${op} (${right})")
tpl_module_definition = CodeTemplate("module ${module_name} where")
tpl_num_value = CodeTemplate("{value}")
tpl_infix_expression = CodeTemplate("({left}) {op} ({right})")
tpl_module_definition = CodeTemplate("module {module_name} where")

def __init__(self, *args, **kwargs):
self._func_idx = 0
Expand Down
2 changes: 1 addition & 1 deletion m2cgen/interpreters/php/code_generator.py
Expand Up @@ -7,7 +7,7 @@

class PhpCodeGenerator(CLikeCodeGenerator):

tpl_array_index_access = CodeTemplate("$$${array_name}[${index}]")
tpl_array_index_access = CodeTemplate("${array_name}[{index}]")

def __init__(self, *args, **kwargs):
super(PhpCodeGenerator, self).__init__(*args, **kwargs)
Expand Down
8 changes: 4 additions & 4 deletions m2cgen/interpreters/powershell/code_generator.py
Expand Up @@ -7,10 +7,10 @@

class PowershellCodeGenerator(CLikeCodeGenerator):

tpl_var_declare = CT("${var_type}${var_name} = ${init_val}")
tpl_var_assignment = CT("${var_name} = ${value}")
tpl_array_index_access = CT("$$${array_name}[${index}]")
tpl_return_statement = CT("return ${value}")
tpl_var_declare = CT("{var_type}{var_name} = {init_val}")
tpl_var_assignment = CT("{var_name} = {value}")
tpl_array_index_access = CT("${array_name}[{index}]")
tpl_return_statement = CT("return {value}")

scalar_type = "[double]"
vector_type = "[double[]]"
Expand Down
14 changes: 7 additions & 7 deletions m2cgen/interpreters/python/code_generator.py
Expand Up @@ -6,14 +6,14 @@

class PythonCodeGenerator(ImperativeCodeGenerator):

tpl_num_value = CT("${value}")
tpl_infix_expression = CT("(${left}) ${op} (${right})")
tpl_return_statement = CT("return ${value}")
tpl_array_index_access = CT("${array_name}[${index}]")
tpl_array_convert_to_numpy = CT("np.asarray(${value})")
tpl_if_statement = CT("if ${if_def}:")
tpl_num_value = CT("{value}")
tpl_infix_expression = CT("({left}) {op} ({right})")
tpl_return_statement = CT("return {value}")
tpl_array_index_access = CT("{array_name}[{index}]")
tpl_array_convert_to_numpy = CT("np.asarray({value})")
tpl_if_statement = CT("if {if_def}:")
tpl_else_statement = CT("else:")
tpl_var_assignment = CT("${var_name} = ${value}")
tpl_var_assignment = CT("{var_name} = {value}")

tpl_var_declaration = CT("")
tpl_block_termination = CT("")
Expand Down
4 changes: 2 additions & 2 deletions m2cgen/interpreters/r/code_generator.py
Expand Up @@ -6,8 +6,8 @@

class RCodeGenerator(CLikeCodeGenerator):

tpl_return_statement = CodeTemplate("return(${value})")
tpl_var_assignment = CodeTemplate("${var_name} <- ${value}")
tpl_return_statement = CodeTemplate("return({value})")
tpl_var_assignment = CodeTemplate("{var_name} <- {value}")

def __init__(self, *args, **kwargs):
super(RCodeGenerator, self).__init__(*args, **kwargs)
Expand Down
10 changes: 5 additions & 5 deletions m2cgen/interpreters/ruby/code_generator.py
Expand Up @@ -7,14 +7,14 @@
class RubyCodeGenerator(ImperativeCodeGenerator):

tpl_var_declaration = CT("")
tpl_num_value = CT("${value}")
tpl_infix_expression = CT("(${left}) ${op} (${right})")
tpl_num_value = CT("{value}")
tpl_infix_expression = CT("({left}) {op} ({right})")
tpl_return_statement = tpl_num_value
tpl_array_index_access = CT("${array_name}[${index}]")
tpl_if_statement = CT("if ${if_def}")
tpl_array_index_access = CT("{array_name}[{index}]")
tpl_if_statement = CT("if {if_def}")
tpl_else_statement = CT("else")
tpl_block_termination = CT("end")
tpl_var_assignment = CT("${var_name} = ${value}")
tpl_var_assignment = CT("{var_name} = {value}")

def add_function_def(self, name, args):
func_def = "def " + name + "("
Expand Down
20 changes: 10 additions & 10 deletions m2cgen/interpreters/visual_basic/code_generator.py
Expand Up @@ -6,18 +6,18 @@


class VisualBasicCodeGenerator(ImperativeCodeGenerator):
tpl_num_value = CodeTemplate("${value}")
tpl_infix_expression = CodeTemplate("(${left}) ${op} (${right})")
tpl_num_value = CodeTemplate("{value}")
tpl_infix_expression = CodeTemplate("({left}) {op} ({right})")
tpl_var_declaration = \
CodeTemplate("Dim ${var_name}${type_modifier} As ${var_type}")
tpl_return_statement = CodeTemplate("${func_name} = ${value}")
tpl_if_statement = CodeTemplate("If ${if_def} Then")
CodeTemplate("Dim {var_name}{type_modifier} As {var_type}")
tpl_return_statement = CodeTemplate("{func_name} = {value}")
tpl_if_statement = CodeTemplate("If {if_def} Then")
tpl_else_statement = CodeTemplate("Else")
tpl_block_termination = CodeTemplate("End ${block_name}")
tpl_array_index_access = CodeTemplate("${array_name}(${index})")
tpl_array_set_by_index = CodeTemplate("${array_name}(${index}) = ${value}")
tpl_var_assignment = CodeTemplate("${var_name} = ${value}")
tpl_module_definition = CodeTemplate("Module ${module_name}")
tpl_block_termination = CodeTemplate("End {block_name}")
tpl_array_index_access = CodeTemplate("{array_name}({index})")
tpl_array_set_by_index = CodeTemplate("{array_name}({index}) = {value}")
tpl_var_assignment = CodeTemplate("{var_name} = {value}")
tpl_module_definition = CodeTemplate("Module {module_name}")

scalar_type = "Double"

Expand Down
1 change: 1 addition & 0 deletions tests/e2e/test_e2e.py
Expand Up @@ -200,6 +200,7 @@ def regression_bounded(model, test_fraction=0.02):
# XGBoost (RF)
regression(xgboost.XGBRFRegressor(**XGBOOST_PARAMS_RF)),
classification(xgboost.XGBRFClassifier(**XGBOOST_PARAMS_RF)),
classification_binary(xgboost.XGBRFClassifier(**XGBOOST_PARAMS_RF)),
# XGBoost (Large Trees)
Expand Down

0 comments on commit eb470a1

Please sign in to comment.