From add18905a816229b3829d0cf3637919d15bc6ffa Mon Sep 17 00:00:00 2001 From: StrikerRUS Date: Tue, 14 Apr 2020 04:43:20 +0300 Subject: [PATCH 1/3] added statsmodels GLM --- README.md | 2 +- m2cgen/assemblers/__init__.py | 11 +- m2cgen/assemblers/linear.py | 114 +++++++++- m2cgen/assemblers/svm.py | 3 +- m2cgen/interpreters/mixins.py | 4 +- tests/assemblers/test_linear.py | 354 ++++++++++++++++++++++++++++---- tests/assemblers/test_meta.py | 40 ++++ tests/e2e/test_e2e.py | 71 +++++++ tests/utils.py | 12 ++ 9 files changed, 567 insertions(+), 44 deletions(-) create mode 100644 tests/assemblers/test_meta.py diff --git a/README.md b/README.md index 54c8bb84..0eb15dd8 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ pip install m2cgen | | Classification | Regression | | --- | --- | --- | -| **Linear** | | | +| **Linear** | | | | **SVM** | | | | **Tree** | | | | **Random Forest** | | | diff --git a/m2cgen/assemblers/__init__.py b/m2cgen/assemblers/__init__.py index 708257f3..a3782fd4 100644 --- a/m2cgen/assemblers/__init__.py +++ b/m2cgen/assemblers/__init__.py @@ -1,6 +1,8 @@ from .linear import (SklearnLinearModelAssembler, StatsmodelsLinearModelAssembler, - ProcessMLEModelAssembler) + ProcessMLEModelAssembler, + StatsmodelsGLMModelAssembler, + StatsmodelsModelAssemblerSelector) from .tree import TreeModelAssembler from .ensemble import RandomForestModelAssembler from .boosting import (XGBoostModelAssemblerSelector, @@ -23,6 +25,8 @@ LightGBMModelAssembler, SklearnSVMModelAssembler, LightningSVMModelAssembler, + StatsmodelsGLMModelAssembler, + StatsmodelsModelAssemblerSelector, ] @@ -74,9 +78,10 @@ "sklearn_TheilSenRegressor": SklearnLinearModelAssembler, # Statsmodels Linear Regressors + "statsmodels_GLMResultsWrapper": StatsmodelsGLMModelAssembler, "statsmodels_ProcessMLEResults": ProcessMLEModelAssembler, "statsmodels_RegressionResultsWrapper": StatsmodelsLinearModelAssembler, - "statsmodels_RegularizedResultsWrapper": StatsmodelsLinearModelAssembler, + "statsmodels_RegularizedResultsWrapper": StatsmodelsModelAssemblerSelector, # Lightning Linear Regressors "lightning_AdaGradRegressor": SklearnLinearModelAssembler, @@ -130,6 +135,6 @@ def get_assembler_cls(model): if not assembler_cls: raise NotImplementedError( - "Model {} is not supported".format(model_name)) + "Model '{}' is not supported".format(model_name)) return assembler_cls diff --git a/m2cgen/assemblers/linear.py b/m2cgen/assemblers/linear.py index c50d58c7..5ded696b 100644 --- a/m2cgen/assemblers/linear.py +++ b/m2cgen/assemblers/linear.py @@ -15,13 +15,18 @@ def _build_ast(self): intercept = utils.to_1d_array(self._get_intercept()) if coef.shape[0] == 1: - return _linear_to_ast(coef[0], intercept[0]) + return self._final_transform( + _linear_to_ast(coef[0], intercept[0])) exprs = [] for idx in range(coef.shape[0]): - exprs.append(_linear_to_ast(coef[idx], intercept[idx])) + exprs.append(self._final_transform( + _linear_to_ast(coef[idx], intercept[idx]))) return ast.VectorVal(exprs) + def _final_transform(self, ast_to_transform): + return ast_to_transform + def _get_intercept(self): raise NotImplementedError @@ -70,6 +75,111 @@ def _get_coef(self): return self.model.params[:self.model.k_exog] +class StatsmodelsGLMModelAssembler(StatsmodelsLinearModelAssembler): + + def _final_transform(self, ast_to_transform): + link_function = type(self.model.model.family.link).__name__ + link_function_lower = link_function.lower() + supported_functions = { + "logit": self._logit, + "power": self._power, + "inverse_power": self._inverse_power, + "sqrt": self._sqrt, + "inverse_squared": self._inverse_squared, + "identity": self._identity, + "log": self._log, + "cloglog": self._cloglog, + "negativebinomial": self._negativebinomial, + "nbinom": self._negativebinomial + } + if link_function_lower not in supported_functions: + raise ValueError( + "Unsupported link function '{}'".format(link_function)) + link_fun = supported_functions[link_function_lower] + return link_fun(ast_to_transform) + + def _logit(self, ast_to_transform): + return utils.div( + ast.NumVal(1.0), + utils.add( + ast.NumVal(1.0), + ast.ExpExpr( + utils.sub( + ast.NumVal(0.0), + ast_to_transform)))) + + def _power(self, ast_to_transform): + power = self.model.model.family.link.power + if power == 1: + return self._identity(ast_to_transform) + elif power == -1: + return self._inverse_power(ast_to_transform) + elif power == 2: + return ast.SqrtExpr(ast_to_transform) + elif power == -2: + return self._inverse_squared(ast_to_transform) + elif power < 0: # some languages may not support negative exponent + return utils.div( + ast.NumVal(1.0), + ast.PowExpr(ast_to_transform, ast.NumVal(1 / -power))) + else: + return ast.PowExpr(ast_to_transform, ast.NumVal(1 / power)) + + def _inverse_power(self, ast_to_transform): + return utils.div(ast.NumVal(1.0), ast_to_transform) + + def _sqrt(self, ast_to_transform): + return ast.PowExpr(ast_to_transform, ast.NumVal(2)) + + def _inverse_squared(self, ast_to_transform): + return utils.div(ast.NumVal(1.0), ast.SqrtExpr(ast_to_transform)) + + def _identity(self, ast_to_transform): + return ast_to_transform + + def _log(self, ast_to_transform): + return ast.ExpExpr(ast_to_transform) + + def _cloglog(self, ast_to_transform): + return utils.sub( + ast.NumVal(1.0), + ast.ExpExpr( + utils.sub( + ast.NumVal(0.0), + ast.ExpExpr(ast_to_transform)))) + + def _negativebinomial(self, ast_to_transform): + return utils.div( + ast.NumVal(-1.0), + utils.mul( + ast.NumVal(self.model.model.family.link.alpha), + utils.sub( + ast.NumVal(1.0), + ast.ExpExpr( + utils.sub( + ast.NumVal(0.0), + ast_to_transform))))) + + +class StatsmodelsModelAssemblerSelector(ModelAssembler): + + def __init__(self, model): + underlying_model = type(model.model).__name__ + if underlying_model == "GLM": + self.assembler = StatsmodelsGLMModelAssembler(model) + elif underlying_model in {"GLS", + "GLSAR", + "OLS", + "WLS"}: + self.assembler = StatsmodelsLinearModelAssembler(model) + else: + raise NotImplementedError( + "Model '{}' is not supported".format(underlying_model)) + + def assemble(self): + return self.assembler.assemble() + + def _linear_to_ast(coef, intercept): feature_weight_mul_ops = [] diff --git a/m2cgen/assemblers/svm.py b/m2cgen/assemblers/svm.py index a3387a89..14f66788 100644 --- a/m2cgen/assemblers/svm.py +++ b/m2cgen/assemblers/svm.py @@ -13,7 +13,8 @@ def __init__(self, model): kernel_type = model.kernel supported_kernels = self._get_supported_kernels() if kernel_type not in supported_kernels: - raise ValueError("Unsupported kernel type {}".format(kernel_type)) + raise ValueError( + "Unsupported kernel type '{}'".format(kernel_type)) self._kernel_fun = supported_kernels[kernel_type] gamma = self._get_gamma() diff --git a/m2cgen/interpreters/mixins.py b/m2cgen/interpreters/mixins.py index d49a92d6..199633bb 100644 --- a/m2cgen/interpreters/mixins.py +++ b/m2cgen/interpreters/mixins.py @@ -61,7 +61,7 @@ class LinearAlgebraMixin(BaseToCodeInterpreter): def interpret_bin_vector_expr(self, expr, extra_func_args=(), **kwargs): if expr.op not in self.supported_bin_vector_ops: raise NotImplementedError( - "Op {} is unsupported".format(expr.op.name)) + "Op '{}' is unsupported".format(expr.op.name)) self.with_linear_algebra = True @@ -77,7 +77,7 @@ def interpret_bin_vector_num_expr(self, expr, extra_func_args=(), **kwargs): if expr.op not in self.supported_bin_vector_num_ops: raise NotImplementedError( - "Op {} is unsupported".format(expr.op.name)) + "Op '{}' is unsupported".format(expr.op.name)) self.with_linear_algebra = True diff --git a/tests/assemblers/test_linear.py b/tests/assemblers/test_linear.py index 82c3ec99..8fb01044 100644 --- a/tests/assemblers/test_linear.py +++ b/tests/assemblers/test_linear.py @@ -5,8 +5,6 @@ from lightning.regression import AdaGradRegressor from lightning.classification import AdaGradClassifier from sklearn import linear_model -from sklearn.dummy import DummyRegressor -from sklearn.tree import DecisionTreeRegressor from m2cgen import assemblers, ast from tests import utils @@ -133,39 +131,6 @@ def test_binary_class(): assert utils.cmp_exprs(actual, expected) -def test_ransac_custom_base_estimator(): - base_estimator = DecisionTreeRegressor() - estimator = linear_model.RANSACRegressor( - base_estimator=base_estimator, - random_state=1) - estimator.fit([[1], [2], [3]], [1, 2, 3]) - - assembler = assemblers.RANSACModelAssembler(estimator) - actual = assembler.assemble() - - expected = ast.IfExpr( - ast.CompExpr( - ast.FeatureRef(0), - ast.NumVal(2.5), - ast.CompOpType.LTE), - ast.NumVal(2.0), - ast.NumVal(3.0)) - - assert utils.cmp_exprs(actual, expected) - - -@pytest.mark.xfail(raises=NotImplementedError, strict=True) -def test_ransac_unknown_base_estimator(): - base_estimator = DummyRegressor() - estimator = linear_model.RANSACRegressor( - base_estimator=base_estimator, - random_state=1) - estimator.fit([[1], [2], [3]], [1, 2, 3]) - - assembler = assemblers.RANSACModelAssembler(estimator) - assembler.assemble() - - def test_statsmodels_wo_const(): estimator = utils.StatsmodelsSklearnLikeWrapper(sm.GLS, {}) _, __, estimator = utils.get_regression_model_trainer()(estimator) @@ -403,6 +368,325 @@ def test_statsmodels_processmle(): assert utils.cmp_exprs(actual, expected) +def test_statsmodels_glm_logit_link_func(): + estimator = utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Binomial( + sm.families.links.logit())), + fit=dict(maxiter=1))) + estimator = estimator.fit([[1], [2]], [0.1, 0.2]) + + assembler = assemblers.StatsmodelsGLMModelAssembler(estimator) + actual = assembler.assemble() + + expected = ast.BinNumExpr( + ast.NumVal(1.0), + ast.BinNumExpr( + ast.NumVal(1.0), + ast.ExpExpr( + ast.BinNumExpr( + ast.NumVal(0.0), + ast.BinNumExpr( + ast.NumVal(0.0), + ast.BinNumExpr( + ast.FeatureRef(0), + ast.NumVal(-0.8567815987), + ast.BinNumOpType.MUL), + ast.BinNumOpType.ADD), + ast.BinNumOpType.SUB)), + ast.BinNumOpType.ADD), + ast.BinNumOpType.DIV) + + assert utils.cmp_exprs(actual, expected) + + +def test_statsmodels_glm_power_link_func(): + estimator = utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Tweedie( + sm.families.links.Power(3))), + fit=dict(maxiter=1))) + estimator = estimator.fit([[1], [2]], [0.1, 0.2]) + + assembler = assemblers.StatsmodelsGLMModelAssembler(estimator) + actual = assembler.assemble() + + expected = ast.PowExpr( + ast.BinNumExpr( + ast.NumVal(0.0), + ast.BinNumExpr( + ast.FeatureRef(0), + ast.NumVal(0.0020808009), + ast.BinNumOpType.MUL), + ast.BinNumOpType.ADD), + ast.NumVal(0.3333333333)) + + assert utils.cmp_exprs(actual, expected) + + +def test_statsmodels_glm_negative_power_link_func(): + estimator = utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Tweedie( + sm.families.links.Power(-3))), + fit=dict(maxiter=1))) + estimator = estimator.fit([[1], [2]], [0.1, 0.2]) + + assembler = assemblers.StatsmodelsGLMModelAssembler(estimator) + actual = assembler.assemble() + + expected = ast.BinNumExpr( + ast.NumVal(1.0), + ast.PowExpr( + ast.BinNumExpr( + ast.NumVal(0.0), + ast.BinNumExpr( + ast.FeatureRef(0), + ast.NumVal(71.0542398846), + ast.BinNumOpType.MUL), + ast.BinNumOpType.ADD), + ast.NumVal(0.3333333333)), + ast.BinNumOpType.DIV) + + assert utils.cmp_exprs(actual, expected) + + +def test_statsmodels_glm_inverse_power_link_func(): + estimator = utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Tweedie( + sm.families.links.Power(-1))), + fit=dict(maxiter=1))) + estimator = estimator.fit([[1], [2]], [0.1, 0.2]) + + assembler = assemblers.StatsmodelsGLMModelAssembler(estimator) + actual = assembler.assemble() + + expected = ast.BinNumExpr( + ast.NumVal(1.0), + ast.BinNumExpr( + ast.NumVal(0.0), + ast.BinNumExpr( + ast.FeatureRef(0), + ast.NumVal(3.0460921844), + ast.BinNumOpType.MUL), + ast.BinNumOpType.ADD), + ast.BinNumOpType.DIV) + + assert utils.cmp_exprs(actual, expected) + + +def test_statsmodels_glm_inverse_squared_link_func(): + estimator = utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Tweedie( + sm.families.links.Power(-2))), + fit=dict(maxiter=1))) + estimator = estimator.fit([[1], [2]], [0.1, 0.2]) + + assembler = assemblers.StatsmodelsGLMModelAssembler(estimator) + actual = assembler.assemble() + + expected = ast.BinNumExpr( + ast.NumVal(1.0), + ast.SqrtExpr( + ast.BinNumExpr( + ast.NumVal(0.0), + ast.BinNumExpr( + ast.FeatureRef(0), + ast.NumVal(15.1237331741), + ast.BinNumOpType.MUL), + ast.BinNumOpType.ADD)), + ast.BinNumOpType.DIV) + + assert utils.cmp_exprs(actual, expected) + + +def test_statsmodels_glm_sqr_power_link_func(): + estimator = utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Tweedie( + sm.families.links.Power(2))), + fit=dict(maxiter=1))) + estimator = estimator.fit([[1], [2]], [0.1, 0.2]) + + assembler = assemblers.StatsmodelsGLMModelAssembler(estimator) + actual = assembler.assemble() + + expected = ast.SqrtExpr( + ast.BinNumExpr( + ast.NumVal(0.0), + ast.BinNumExpr( + ast.FeatureRef(0), + ast.NumVal(0.0154915480), + ast.BinNumOpType.MUL), + ast.BinNumOpType.ADD)) + + assert utils.cmp_exprs(actual, expected) + + +def test_statsmodels_glm_identity_link_func(): + estimator = utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Tweedie( + sm.families.links.Power(1))), + fit=dict(maxiter=1))) + estimator = estimator.fit([[1], [2], [3]], [0.1, 0.2, 0.2]) + + assembler = assemblers.StatsmodelsGLMModelAssembler(estimator) + actual = assembler.assemble() + + expected = ast.BinNumExpr( + ast.NumVal(0.0), + ast.BinNumExpr( + ast.FeatureRef(0), + ast.NumVal(0.0791304348), + ast.BinNumOpType.MUL), + ast.BinNumOpType.ADD) + + assert utils.cmp_exprs(actual, expected) + + +def test_statsmodels_glm_sqrt_link_func(): + estimator = utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Poisson( + sm.families.links.sqrt())), + fit=dict(maxiter=1))) + estimator = estimator.fit([[1], [2]], [0.1, 0.2]) + + assembler = assemblers.StatsmodelsGLMModelAssembler(estimator) + actual = assembler.assemble() + + expected = ast.PowExpr( + ast.BinNumExpr( + ast.NumVal(0.0), + ast.BinNumExpr( + ast.FeatureRef(0), + ast.NumVal(0.2429239017), + ast.BinNumOpType.MUL), + ast.BinNumOpType.ADD), + ast.NumVal(2)) + + assert utils.cmp_exprs(actual, expected) + + +def test_statsmodels_glm_log_link_func(): + estimator = utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Poisson( + sm.families.links.log())), + fit=dict(maxiter=1))) + estimator = estimator.fit([[1], [2]], [0.1, 0.2]) + + assembler = assemblers.StatsmodelsGLMModelAssembler(estimator) + actual = assembler.assemble() + + expected = ast.ExpExpr( + ast.BinNumExpr( + ast.NumVal(0.0), + ast.BinNumExpr( + ast.FeatureRef(0), + ast.NumVal(-1.0242053933), + ast.BinNumOpType.MUL), + ast.BinNumOpType.ADD)) + + assert utils.cmp_exprs(actual, expected) + + +def test_statsmodels_glm_cloglog_link_func(): + estimator = utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Binomial( + sm.families.links.cloglog())), + fit=dict(maxiter=1))) + estimator = estimator.fit([[1], [2]], [0.1, 0.2]) + + assembler = assemblers.StatsmodelsGLMModelAssembler(estimator) + actual = assembler.assemble() + + expected = ast.BinNumExpr( + ast.NumVal(1.0), + ast.ExpExpr( + ast.BinNumExpr( + ast.NumVal(0.0), + ast.ExpExpr( + ast.BinNumExpr( + ast.NumVal(0.0), + ast.BinNumExpr( + ast.FeatureRef(0), + ast.NumVal(-0.8914468745), + ast.BinNumOpType.MUL), + ast.BinNumOpType.ADD)), + ast.BinNumOpType.SUB)), + ast.BinNumOpType.SUB) + + assert utils.cmp_exprs(actual, expected) + + +def test_statsmodels_glm_negativebinomial_link_func(): + estimator = utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.NegativeBinomial( + sm.families.links.nbinom())), + fit=dict(maxiter=1))) + estimator = estimator.fit([[1], [2]], [0.1, 0.2]) + + assembler = assemblers.StatsmodelsGLMModelAssembler(estimator) + actual = assembler.assemble() + + expected = ast.BinNumExpr( + ast.NumVal(-1.0), + ast.BinNumExpr( + ast.NumVal(1.0), + ast.BinNumExpr( + ast.NumVal(1.0), + ast.ExpExpr( + ast.BinNumExpr( + ast.NumVal(0.0), + ast.BinNumExpr( + ast.NumVal(0.0), + ast.BinNumExpr( + ast.FeatureRef(0), + ast.NumVal(-1.1079583217), + ast.BinNumOpType.MUL), + ast.BinNumOpType.ADD), + ast.BinNumOpType.SUB)), + ast.BinNumOpType.SUB), + ast.BinNumOpType.MUL), + ast.BinNumOpType.DIV) + + assert utils.cmp_exprs(actual, expected) + + +@pytest.mark.xfail(raises=ValueError, strict=True) +def test_statsmodels_glm_unknown_link_func(): + + class ValidPowerLink(sm.families.links.Power): + pass + + estimator = utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Tweedie(ValidPowerLink(2))), + fit=dict(maxiter=1))) + estimator = estimator.fit([[1], [2]], [0.1, 0.2]) + + assembler = assemblers.StatsmodelsGLMModelAssembler(estimator) + assembler.assemble() + + def test_lightning_regression(): estimator = AdaGradRegressor(random_state=1) utils.get_regression_model_trainer()(estimator) diff --git a/tests/assemblers/test_meta.py b/tests/assemblers/test_meta.py new file mode 100644 index 00000000..5c217aba --- /dev/null +++ b/tests/assemblers/test_meta.py @@ -0,0 +1,40 @@ +import pytest +from sklearn import linear_model +from sklearn.dummy import DummyRegressor +from sklearn.tree import DecisionTreeRegressor + +from m2cgen import assemblers, ast +from tests import utils + + +def test_ransac_custom_base_estimator(): + base_estimator = DecisionTreeRegressor() + estimator = linear_model.RANSACRegressor( + base_estimator=base_estimator, + random_state=1) + estimator.fit([[1], [2], [3]], [1, 2, 3]) + + assembler = assemblers.RANSACModelAssembler(estimator) + actual = assembler.assemble() + + expected = ast.IfExpr( + ast.CompExpr( + ast.FeatureRef(0), + ast.NumVal(2.5), + ast.CompOpType.LTE), + ast.NumVal(2.0), + ast.NumVal(3.0)) + + assert utils.cmp_exprs(actual, expected) + + +@pytest.mark.xfail(raises=NotImplementedError, strict=True) +def test_ransac_unknown_base_estimator(): + base_estimator = DummyRegressor() + estimator = linear_model.RANSACRegressor( + base_estimator=base_estimator, + random_state=1) + estimator.fit([[1], [2], [3]], [1, 2, 3]) + + assembler = assemblers.RANSACModelAssembler(estimator) + assembler.assemble() diff --git a/tests/e2e/test_e2e.py b/tests/e2e/test_e2e.py index 8b6e8baa..80ccd777 100644 --- a/tests/e2e/test_e2e.py +++ b/tests/e2e/test_e2e.py @@ -84,6 +84,14 @@ def classification_binary_random(model, test_fraction=0.02): ) +def regression_bounded(model, test_fraction=0.02): + return ( + model, + utils.get_bounded_regression_model_trainer(test_fraction), + REGRESSION, + ) + + # Absolute tolerance. Used in np.isclose to compare 2 values. # We compare 6 decimal digits. ATOL = 1.e-6 @@ -272,6 +280,69 @@ def classification_binary_random(model, test_fraction=0.02): regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)), # Statsmodels Linear Regression + classification_binary(utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(fit_constrained=dict(constraints=(np.eye( + utils.get_binary_classification_model_trainer() + .X_train.shape[-1])[0], [1]))))), + classification_binary(utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(fit_regularized=STATSMODELS_LINEAR_REGULARIZED_PARAMS))), + classification_binary(utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Binomial( + sm.families.links.cloglog())), + fit=dict(maxiter=2)))), + classification_binary(utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Binomial( + sm.families.links.logit())), + fit=dict(maxiter=2)))), + regression(utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + fit_intercept=True, family=sm.families.Gaussian( + sm.families.links.identity()))))), + regression(utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + fit_intercept=True, family=sm.families.Gaussian( + sm.families.links.inverse_power()))))), + regression_bounded(utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.InverseGaussian( + sm.families.links.inverse_squared()))))), + classification_binary(utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + fit_intercept=True, family=sm.families.NegativeBinomial( + sm.families.links.nbinom())), + fit=dict(maxiter=2)))), + classification_binary(utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + fit_intercept=True, family=sm.families.Poisson( + sm.families.links.log())), + fit=dict(maxiter=2)))), + classification_binary(utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + fit_intercept=True, family=sm.families.Poisson( + sm.families.links.sqrt())), + fit=dict(maxiter=2)))), + regression_bounded(utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + family=sm.families.Tweedie( + sm.families.links.Power(-3)))))), + regression_bounded(utils.StatsmodelsSklearnLikeWrapper( + sm.GLM, + dict(init=dict( + fit_intercept=True, family=sm.families.Tweedie( + sm.families.links.Power(2)))))), regression(utils.StatsmodelsSklearnLikeWrapper( sm.GLS, dict(init=dict(sigma=np.eye( diff --git a/tests/utils.py b/tests/utils.py index af926a1e..ad814fcf 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -44,6 +44,9 @@ def fit(self, X, y): elif "iterative_fit" in self.params: self.fitted_model_ = est.iterative_fit( **self.params["iterative_fit"]) + elif "fit_constrained" in self.params: + self.fitted_model_ = est.fit_constrained( + **self.params["fit_constrained"]) else: self.fitted_model_ = est.fit(**self.params.get("fit", {})) # mock class module and name to show appropriate model name in tests @@ -70,6 +73,12 @@ def __init__(self, dataset_name, test_fraction): dataset = datasets.load_boston() self.X, self.y = shuffle( dataset.data, dataset.target, random_state=13) + elif dataset_name == "boston_y_bounded": + self.name = "train_model_regression_bounded" + dataset = datasets.load_boston() + self.X, self.y = shuffle( + dataset.data, dataset.target, random_state=13) + self.y = np.arctan(self.y) / np.pi + 0.5 # (0; 1) elif dataset_name == "iris": self.name = "train_model_classification" dataset = datasets.load_iris() @@ -192,6 +201,9 @@ def assert_code_equal(actual, expected): get_classification_binary_random_data_model_trainer = functools.partial( ModelTrainer.get_instance, "classification_binary_rnd") +get_bounded_regression_model_trainer = functools.partial( + ModelTrainer.get_instance, "boston_y_bounded") + @contextlib.contextmanager def tmp_dir(): From 9f86a3edc2a666de544bdaf32f3caded3ba75ea8 Mon Sep 17 00:00:00 2001 From: StrikerRUS Date: Sun, 3 May 2020 02:54:52 +0300 Subject: [PATCH 2/3] fixed inverse link function names --- m2cgen/assemblers/linear.py | 52 ++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/m2cgen/assemblers/linear.py b/m2cgen/assemblers/linear.py index 5ded696b..659b2002 100644 --- a/m2cgen/assemblers/linear.py +++ b/m2cgen/assemblers/linear.py @@ -80,25 +80,25 @@ class StatsmodelsGLMModelAssembler(StatsmodelsLinearModelAssembler): def _final_transform(self, ast_to_transform): link_function = type(self.model.model.family.link).__name__ link_function_lower = link_function.lower() - supported_functions = { - "logit": self._logit, - "power": self._power, - "inverse_power": self._inverse_power, - "sqrt": self._sqrt, - "inverse_squared": self._inverse_squared, - "identity": self._identity, - "log": self._log, - "cloglog": self._cloglog, - "negativebinomial": self._negativebinomial, - "nbinom": self._negativebinomial + supported_inversed_functions = { + "logit": self._logit_inversed, + "power": self._power_inversed, + "inverse_power": self._inverse_power_inversed, + "sqrt": self._sqrt_inversed, + "inverse_squared": self._inverse_squared_inversed, + "identity": self._identity_inversed, + "log": self._log_inversed, + "cloglog": self._cloglog_inversed, + "negativebinomial": self._negativebinomial_inversed, + "nbinom": self._negativebinomial_inversed } - if link_function_lower not in supported_functions: + if link_function_lower not in supported_inversed_functions: raise ValueError( "Unsupported link function '{}'".format(link_function)) - link_fun = supported_functions[link_function_lower] - return link_fun(ast_to_transform) + fun = supported_inversed_functions[link_function_lower] + return fun(ast_to_transform) - def _logit(self, ast_to_transform): + def _logit_inversed(self, ast_to_transform): return utils.div( ast.NumVal(1.0), utils.add( @@ -108,16 +108,16 @@ def _logit(self, ast_to_transform): ast.NumVal(0.0), ast_to_transform)))) - def _power(self, ast_to_transform): + def _power_inversed(self, ast_to_transform): power = self.model.model.family.link.power if power == 1: - return self._identity(ast_to_transform) + return self._identity_inversed(ast_to_transform) elif power == -1: - return self._inverse_power(ast_to_transform) + return self._inverse_power_inversed(ast_to_transform) elif power == 2: return ast.SqrtExpr(ast_to_transform) elif power == -2: - return self._inverse_squared(ast_to_transform) + return self._inverse_squared_inversed(ast_to_transform) elif power < 0: # some languages may not support negative exponent return utils.div( ast.NumVal(1.0), @@ -125,22 +125,22 @@ def _power(self, ast_to_transform): else: return ast.PowExpr(ast_to_transform, ast.NumVal(1 / power)) - def _inverse_power(self, ast_to_transform): + def _inverse_power_inversed(self, ast_to_transform): return utils.div(ast.NumVal(1.0), ast_to_transform) - def _sqrt(self, ast_to_transform): + def _sqrt_inversed(self, ast_to_transform): return ast.PowExpr(ast_to_transform, ast.NumVal(2)) - def _inverse_squared(self, ast_to_transform): + def _inverse_squared_inversed(self, ast_to_transform): return utils.div(ast.NumVal(1.0), ast.SqrtExpr(ast_to_transform)) - def _identity(self, ast_to_transform): + def _identity_inversed(self, ast_to_transform): return ast_to_transform - def _log(self, ast_to_transform): + def _log_inversed(self, ast_to_transform): return ast.ExpExpr(ast_to_transform) - def _cloglog(self, ast_to_transform): + def _cloglog_inversed(self, ast_to_transform): return utils.sub( ast.NumVal(1.0), ast.ExpExpr( @@ -148,7 +148,7 @@ def _cloglog(self, ast_to_transform): ast.NumVal(0.0), ast.ExpExpr(ast_to_transform)))) - def _negativebinomial(self, ast_to_transform): + def _negativebinomial_inversed(self, ast_to_transform): return utils.div( ast.NumVal(-1.0), utils.mul( From 9889e8e3848c61f702bd215936f0ff7d4645f6b7 Mon Sep 17 00:00:00 2001 From: StrikerRUS Date: Sun, 3 May 2020 22:08:28 +0300 Subject: [PATCH 3/3] use quotes for more names --- m2cgen/assemblers/boosting.py | 2 +- m2cgen/ast.py | 2 +- m2cgen/interpreters/interpreter.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/m2cgen/assemblers/boosting.py b/m2cgen/assemblers/boosting.py index 714da1b6..1a23045f 100644 --- a/m2cgen/assemblers/boosting.py +++ b/m2cgen/assemblers/boosting.py @@ -160,7 +160,7 @@ def _assemble_child_tree(self, tree, child_id): for child in tree["children"]: if child["nodeid"] == child_id: return self._assemble_tree(child) - assert False, "Unexpected child ID {}".format(child_id) + assert False, "Unexpected child ID: {}".format(child_id) class XGBoostLinearModelAssembler(BaseBoostingAssembler): diff --git a/m2cgen/ast.py b/m2cgen/ast.py index f4195af7..a7eea37c 100644 --- a/m2cgen/ast.py +++ b/m2cgen/ast.py @@ -257,4 +257,4 @@ def count_exprs(expr, exclude_list=None): nested_f(expr))) expr_type_name = expr_type.__name__ - raise ValueError("Unexpected expression type {}".format(expr_type_name)) + raise ValueError("Unexpected expression type '{}'".format(expr_type_name)) diff --git a/m2cgen/interpreters/interpreter.py b/m2cgen/interpreters/interpreter.py index a8d427af..4e08f74a 100644 --- a/m2cgen/interpreters/interpreter.py +++ b/m2cgen/interpreters/interpreter.py @@ -59,7 +59,7 @@ def _select_handler(self, expr): if hasattr(self, handler_name): return getattr(self, handler_name) raise NotImplementedError( - "No handler found for {}".format(type(expr).__name__)) + "No handler found for '{}'".format(type(expr).__name__)) @staticmethod def _handler_name(expr_tpe):