Skip to content

Commit

Permalink
Merge 3d69ea4 into 4869e00
Browse files Browse the repository at this point in the history
  • Loading branch information
StrikerRUS committed May 20, 2020
2 parents 4869e00 + 3d69ea4 commit 4a8294d
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 22 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -44,7 +44,7 @@ pip install m2cgen

| | Classification | Regression |
| --- | --- | --- |
| **Linear** | <ul><li>scikit-learn<ul><li>LogisticRegression</li><li>LogisticRegressionCV</li><li>PassiveAggressiveClassifier</li><li>Perceptron</li><li>RidgeClassifier</li><li>RidgeClassifierCV</li><li>SGDClassifier</li></ul></li><li>lightning<ul><li>AdaGradClassifier</li><li>CDClassifier</li><li>FistaClassifier</li><li>SAGAClassifier</li><li>SAGClassifier</li><li>SDCAClassifier</li><li>SGDClassifier</li></ul></li></ul> | <ul><li>scikit-learn<ul><li>ARDRegression</li><li>BayesianRidge</li><li>ElasticNet</li><li>ElasticNetCV</li><li>HuberRegressor</li><li>Lars</li><li>LarsCV</li><li>Lasso</li><li>LassoCV</li><li>LassoLars</li><li>LassoLarsCV</li><li>LassoLarsIC</li><li>LinearRegression</li><li>OrthogonalMatchingPursuit</li><li>OrthogonalMatchingPursuitCV</li><li>PassiveAggressiveRegressor</li><li>RANSACRegressor(only supported regression estimators can be used as a base estimator)</li><li>Ridge</li><li>RidgeCV</li><li>SGDRegressor</li><li>TheilSenRegressor</li></ul><li>StatsModels<ul><li>Generalized Least Squares (GLS)</li><li>Generalized Least Squares with AR Errors (GLSAR)</li><li>Generalized Linear Models (GLM)</li><li>Ordinary Least Squares (OLS)</li><li>[Gaussian] Process Regression Using Maximum Likelihood-based Estimation (ProcessMLE)</li><li>Quantile Regression (QuantReg)</li><li>Weighted Least Squares (WLS)</li></ul><li>lightning<ul><li>AdaGradRegressor</li><li>CDRegressor</li><li>FistaRegressor</li><li>SAGARegressor</li><li>SAGRegressor</li><li>SDCARegressor</li></ul></li></ul> |
| **Linear** | <ul><li>scikit-learn<ul><li>LogisticRegression</li><li>LogisticRegressionCV</li><li>PassiveAggressiveClassifier</li><li>Perceptron</li><li>RidgeClassifier</li><li>RidgeClassifierCV</li><li>SGDClassifier</li></ul></li><li>lightning<ul><li>AdaGradClassifier</li><li>CDClassifier</li><li>FistaClassifier</li><li>SAGAClassifier</li><li>SAGClassifier</li><li>SDCAClassifier</li><li>SGDClassifier</li></ul></li></ul> | <ul><li>scikit-learn<ul><li>ARDRegression</li><li>BayesianRidge</li><li>ElasticNet</li><li>ElasticNetCV</li><li>GammaRegressor</li><li>HuberRegressor</li><li>Lars</li><li>LarsCV</li><li>Lasso</li><li>LassoCV</li><li>LassoLars</li><li>LassoLarsCV</li><li>LassoLarsIC</li><li>LinearRegression</li><li>OrthogonalMatchingPursuit</li><li>OrthogonalMatchingPursuitCV</li><li>PassiveAggressiveRegressor</li><li>PoissonRegressor</li><li>RANSACRegressor(only supported regression estimators can be used as a base estimator)</li><li>Ridge</li><li>RidgeCV</li><li>SGDRegressor</li><li>TheilSenRegressor</li><li>TweedieRegressor</li></ul><li>StatsModels<ul><li>Generalized Least Squares (GLS)</li><li>Generalized Least Squares with AR Errors (GLSAR)</li><li>Generalized Linear Models (GLM)</li><li>Ordinary Least Squares (OLS)</li><li>[Gaussian] Process Regression Using Maximum Likelihood-based Estimation (ProcessMLE)</li><li>Quantile Regression (QuantReg)</li><li>Weighted Least Squares (WLS)</li></ul><li>lightning<ul><li>AdaGradRegressor</li><li>CDRegressor</li><li>FistaRegressor</li><li>SAGARegressor</li><li>SAGRegressor</li><li>SDCARegressor</li></ul></li></ul> |
| **SVM** | <ul><li>scikit-learn<ul><li>LinearSVC</li><li>NuSVC</li><li>SVC</li></ul></li><li>lightning<ul><li>KernelSVC</li><li>LinearSVC</li></ul></li></ul> | <ul><li>scikit-learn<ul><li>LinearSVR</li><li>NuSVR</li><li>SVR</li></ul></li><li>lightning<ul><li>LinearSVR</li></ul></li></ul> |
| **Tree** | <ul><li>DecisionTreeClassifier</li><li>ExtraTreeClassifier</li></ul> | <ul><li>DecisionTreeRegressor</li><li>ExtraTreeRegressor</li></ul> |
| **Random Forest** | <ul><li>ExtraTreesClassifier</li><li>LGBMClassifier(rf booster only)</li><li>RandomForestClassifier</li><li>XGBRFClassifier</li></ul> | <ul><li>ExtraTreesRegressor</li><li>LGBMRegressor(rf booster only)</li><li>RandomForestRegressor</li><li>XGBRFRegressor</li></ul> |
Expand Down
7 changes: 6 additions & 1 deletion m2cgen/assemblers/__init__.py
Expand Up @@ -2,7 +2,8 @@
StatsmodelsLinearModelAssembler,
ProcessMLEModelAssembler,
StatsmodelsGLMModelAssembler,
StatsmodelsModelAssemblerSelector)
StatsmodelsModelAssemblerSelector,
SklearnGLMModelAssembler)
from .tree import TreeModelAssembler
from .ensemble import RandomForestModelAssembler
from .boosting import (XGBoostModelAssemblerSelector,
Expand All @@ -27,6 +28,7 @@
LightningSVMModelAssembler,
StatsmodelsGLMModelAssembler,
StatsmodelsModelAssemblerSelector,
SklearnGLMModelAssembler,
]


Expand Down Expand Up @@ -59,6 +61,7 @@
"sklearn_BayesianRidge": SklearnLinearModelAssembler,
"sklearn_ElasticNet": SklearnLinearModelAssembler,
"sklearn_ElasticNetCV": SklearnLinearModelAssembler,
"sklearn_GammaRegressor": SklearnGLMModelAssembler,
"sklearn_HuberRegressor": SklearnLinearModelAssembler,
"sklearn_Lars": SklearnLinearModelAssembler,
"sklearn_LarsCV": SklearnLinearModelAssembler,
Expand All @@ -71,11 +74,13 @@
"sklearn_OrthogonalMatchingPursuit": SklearnLinearModelAssembler,
"sklearn_OrthogonalMatchingPursuitCV": SklearnLinearModelAssembler,
"sklearn_PassiveAggressiveRegressor": SklearnLinearModelAssembler,
"sklearn_PoissonRegressor": SklearnGLMModelAssembler,
"sklearn_RANSACRegressor": RANSACModelAssembler,
"sklearn_Ridge": SklearnLinearModelAssembler,
"sklearn_RidgeCV": SklearnLinearModelAssembler,
"sklearn_SGDRegressor": SklearnLinearModelAssembler,
"sklearn_TheilSenRegressor": SklearnLinearModelAssembler,
"sklearn_TweedieRegressor": SklearnGLMModelAssembler,

# Statsmodels Linear Regressors
"statsmodels_GLMResultsWrapper": StatsmodelsGLMModelAssembler,
Expand Down
77 changes: 59 additions & 18 deletions m2cgen/assemblers/linear.py
Expand Up @@ -76,29 +76,24 @@ def _get_coef(self):
return self.model.params[:self.model.k_exog]


class StatsmodelsGLMModelAssembler(StatsmodelsLinearModelAssembler):
class GLMMixin:

def _final_transform(self, ast_to_transform):
link_function = type(self.model.model.family.link).__name__
link_function = self._get_link_function_name()
link_function_lower = link_function.lower()
supported_inversed_functions = {
"logit": self._logit_inversed,
"power": self._power_inversed,
"inverse_power": self._inverse_power_inversed,
"sqrt": self._sqrt_inversed,
"inverse_squared": self._inverse_squared_inversed,
"identity": self._identity_inversed,
"log": self._log_inversed,
"cloglog": self._cloglog_inversed,
"negativebinomial": self._negativebinomial_inversed,
"nbinom": self._negativebinomial_inversed
}
if link_function_lower not in supported_inversed_functions:
supported_inversed_funs = self._get_supported_inversed_funs()
if link_function_lower not in supported_inversed_funs:
raise ValueError(
"Unsupported link function '{}'".format(link_function))
fun = supported_inversed_functions[link_function_lower]
fun = supported_inversed_funs[link_function_lower]
return fun(ast_to_transform)

def _get_link_function_name(self):
raise NotImplementedError

def _get_supported_inversed_funs(self):
raise NotImplementedError

def _logit_inversed(self, ast_to_transform):
return utils.div(
ast.NumVal(1.0),
Expand All @@ -110,7 +105,7 @@ def _logit_inversed(self, ast_to_transform):
ast_to_transform))))

def _power_inversed(self, ast_to_transform):
power = self.model.model.family.link.power
power = self._get_power()
if power == 1:
return self._identity_inversed(ast_to_transform)
elif power == -1:
Expand Down Expand Up @@ -150,17 +145,50 @@ def _cloglog_inversed(self, ast_to_transform):
ast.ExpExpr(ast_to_transform))))

def _negativebinomial_inversed(self, ast_to_transform):
alpha = self._get_alpha()
return utils.div(
ast.NumVal(-1.0),
utils.mul(
ast.NumVal(self.model.model.family.link.alpha),
ast.NumVal(alpha),
utils.sub(
ast.NumVal(1.0),
ast.ExpExpr(
utils.sub(
ast.NumVal(0.0),
ast_to_transform)))))

def _get_power(self):
raise NotImplementedError

def _get_alpha(self):
raise NotImplementedError


class StatsmodelsGLMModelAssembler(GLMMixin, StatsmodelsLinearModelAssembler):

def _get_link_function_name(self):
return type(self.model.model.family.link).__name__

def _get_supported_inversed_funs(self):
return {
"logit": self._logit_inversed,
"power": self._power_inversed,
"inverse_power": self._inverse_power_inversed,
"sqrt": self._sqrt_inversed,
"inverse_squared": self._inverse_squared_inversed,
"identity": self._identity_inversed,
"log": self._log_inversed,
"cloglog": self._cloglog_inversed,
"negativebinomial": self._negativebinomial_inversed,
"nbinom": self._negativebinomial_inversed
}

def _get_power(self):
return self.model.model.family.link.power

def _get_alpha(self):
return self.model.model.family.link.alpha


class StatsmodelsModelAssemblerSelector(ModelAssembler):

Expand All @@ -181,6 +209,19 @@ def assemble(self):
return self.assembler.assemble()


class SklearnGLMModelAssembler(GLMMixin, SklearnLinearModelAssembler):

def _get_link_function_name(self):
return type(self.model._link_instance).__name__

def _get_supported_inversed_funs(self):
return {
"identitylink": self._identity_inversed,
"loglink": self._log_inversed,
"logitlink": self._logit_inversed
}


def _linear_to_ast(coef, intercept):
feature_weight_mul_ops = [
utils.mul(ast.FeatureRef(index), ast.NumVal(value))
Expand Down
7 changes: 5 additions & 2 deletions requirements-test.txt
@@ -1,9 +1,12 @@
# Supported models
scikit-learn==0.22.2.post1
scikit-learn==0.23.1
xgboost==1.0.2
lightgbm==2.3.1
statsmodels==0.11.1
git+git://github.com/scikit-learn-contrib/lightning.git@b96f9c674968496e854078163c8814049a7b9f43
# The latest master branch of lightning is incompatible with scikit-learn >=0.23,
# but you still can install it with older scikit-learn (<0.23) via
# git+git://github.com/scikit-learn-contrib/lightning.git@b96f9c674968496e854078163c8814049a7b9f43
git+git://github.com/scikit-learn-contrib/lightning.git@refs/pull/142/head

# Testing tools
flake8==3.7.9
Expand Down
49 changes: 49 additions & 0 deletions tests/assemblers/test_linear.py
Expand Up @@ -687,6 +687,55 @@ class ValidPowerLink(sm.families.links.Power):
assembler.assemble()


def test_sklearn_glm_identity_link_func():
estimator = linear_model.TweedieRegressor(
power=0, link="identity", max_iter=10)
estimator = estimator.fit([[1], [2]], [0.1, 0.2])

assembler = assemblers.SklearnGLMModelAssembler(estimator)
actual = assembler.assemble()

expected = ast.BinNumExpr(
ast.NumVal(0.12),
ast.BinNumExpr(
ast.FeatureRef(0),
ast.NumVal(0.02),
ast.BinNumOpType.MUL),
ast.BinNumOpType.ADD)

assert utils.cmp_exprs(actual, expected)


def test_sklearn_glm_log_link_func():
estimator = linear_model.TweedieRegressor(
power=1, link="log", fit_intercept=False, max_iter=10)
estimator = estimator.fit([[1], [2]], [0.1, 0.2])

assembler = assemblers.SklearnGLMModelAssembler(estimator)
actual = assembler.assemble()

expected = ast.ExpExpr(
ast.BinNumExpr(
ast.NumVal(0.0),
ast.BinNumExpr(
ast.FeatureRef(0),
ast.NumVal(-0.4619711397),
ast.BinNumOpType.MUL),
ast.BinNumOpType.ADD))

assert utils.cmp_exprs(actual, expected)


@pytest.mark.xfail(raises=ValueError, strict=True)
def test_sklearn_glm_unknown_link_func():
estimator = linear_model.TweedieRegressor(
power=1, link="this_link_func_does_not_exist", max_iter=10)
estimator = estimator.fit([[1], [2]], [0.1, 0.2])

assembler = assemblers.SklearnGLMModelAssembler(estimator)
assembler.assemble()


def test_lightning_regression():
estimator = AdaGradRegressor(random_state=1)
utils.get_regression_model_trainer()(estimator)
Expand Down
7 changes: 7 additions & 0 deletions tests/e2e/test_e2e.py
Expand Up @@ -263,6 +263,7 @@ def regression_bounded(model, test_fraction=0.02):
regression(linear_model.BayesianRidge()),
regression(linear_model.ElasticNet(random_state=RANDOM_SEED)),
regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)),
regression(linear_model.GammaRegressor()),
regression(linear_model.HuberRegressor()),
regression(linear_model.Lars()),
regression(linear_model.LarsCV()),
Expand All @@ -276,13 +277,19 @@ def regression_bounded(model, test_fraction=0.02):
regression(linear_model.OrthogonalMatchingPursuitCV()),
regression(linear_model.PassiveAggressiveRegressor(
random_state=RANDOM_SEED)),
regression(linear_model.PoissonRegressor()),
regression(linear_model.RANSACRegressor(
base_estimator=tree.ExtraTreeRegressor(**TREE_PARAMS),
random_state=RANDOM_SEED)),
regression(linear_model.Ridge(random_state=RANDOM_SEED)),
regression(linear_model.RidgeCV()),
regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)),
regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)),
regression(linear_model.TweedieRegressor(power=0.0)),
regression(linear_model.TweedieRegressor(power=1.0)),
regression(linear_model.TweedieRegressor(power=1.5)),
regression(linear_model.TweedieRegressor(power=2.0)),
regression(linear_model.TweedieRegressor(power=3.0)),
# Statsmodels Linear Regression
classification_binary(utils.StatsmodelsSklearnLikeWrapper(
Expand Down

0 comments on commit 4a8294d

Please sign in to comment.