Skip to content

Commit

Permalink
remove xgboost
Browse files Browse the repository at this point in the history
  • Loading branch information
Axel DEROMBLAY committed Apr 26, 2019
1 parent 8e576df commit 96c50e9
Show file tree
Hide file tree
Showing 10 changed files with 439 additions and 125 deletions.
388 changes: 388 additions & 0 deletions .idea/workspace.xml

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion docs/history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,8 @@ History

0.5.1 (2017-08-25)
------------------
* improvement in verbose mode for reader (display target quantiles for regression)
* improvement in verbose mode for reader (display target quantiles for regression)

0.6.0 (2019-04-26)
------------------
* remove xgboost installation
10 changes: 6 additions & 4 deletions python-package/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Compatibilities
---------------

* *It is compatible with:* **Python 2.7 - 3.6**. & **64-bit version only** (32-bit python is not supported)
* *Operating system:* **Linux**. (MacOS & Windows very soon...)
* *Operating system:* **Linux**. (EXPERIMENTAL for MacOS & Windows)


Preparation
Expand All @@ -26,16 +26,18 @@ First, make sure you have `setuptools <https://pypi.python.org/pypi/setuptools>`
.. code-block:: console
$ pip install cmake
If you get any errors during preparation, please refer to `LightGBM's installation guide <https://github.com/Microsoft/LightGBM/tree/master/python-package#lightgbm-python-package>`__


Installation
------------

Install from pip
~~~~~~~~~~~~~~~~

MLBox is now available on **PyPI**, so you only need to run the following command:
MLBox is available on **PyPI**, so you only need to run the following command:

.. code-block:: console
Expand Down
2 changes: 1 addition & 1 deletion python-package/mlbox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

__author__ = """Axel ARONIO DE ROMBLAY"""
__email__ = 'axelderomblay@gmail.com'
__version__ = '0.5.3'
__version__ = '0.6.0'

from .preprocessing import *
from .encoding import *
Expand Down
61 changes: 12 additions & 49 deletions python-package/mlbox/model/supervised/classification/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,9 @@
import pandas as pd
from sklearn.ensemble import (AdaBoostClassifier, BaggingClassifier,
ExtraTreesClassifier, RandomForestClassifier)
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

global lgbm_installed

try:
from lightgbm import LGBMClassifier
lgbm_installed = True
except Exception:
warnings.warn(
"Package lightgbm is not installed. Model LightGBM will be replaced by"
"XGBoost")
lgbm_installed = False
from lightgbm import LGBMClassifier


class Classifier():
Expand All @@ -31,10 +20,9 @@ class Classifier():
Parameters
----------
strategy : str, default = "LightGBM" if installed else "XGBoost"
strategy : str, default = "LightGBM"
The choice for the classifier.
Available strategies = "LightGBM" (if installed), "XGBoost",
"RandomForest", "ExtraTrees", "Tree", "Bagging", "AdaBoost" or "Linear"
Available strategies = "LightGBM", "RandomForest", "ExtraTrees", "Tree", "Bagging", "AdaBoost" or "Linear"
**params : default = None
Parameters of the corresponding classifier.
Expand All @@ -46,10 +34,7 @@ def __init__(self, **params):
if ("strategy" in params):
self.__strategy = params["strategy"]
else:
if (lgbm_installed):
self.__strategy = "LightGBM"
else:
self.__strategy = "XGBoost"
self.__strategy = "LightGBM"

self.__classif_params = {}

Expand Down Expand Up @@ -111,31 +96,10 @@ def __set_classifier(self, strategy):
n_estimators=400, max_depth=10, max_features='sqrt',
bootstrap=True, n_jobs=-1, random_state=0)

elif(strategy == 'XGBoost'):
self.__classifier = XGBClassifier(n_estimators=500, max_depth=6,
learning_rate=0.05,
colsample_bytree=0.8,
colsample_bylevel=1.,
subsample=0.9,
nthread=-1, seed=0)

elif(strategy == "LightGBM"):
if(lgbm_installed):
self.__classifier = LGBMClassifier(
n_estimators=500, learning_rate=0.05,
colsample_bytree=0.8, subsample=0.9, nthread=-1, seed=0)
else:
warnings.warn(
"Package lightgbm is not installed. Model LightGBM will be"
"replaced by XGBoost")
self.__strategy = "XGBoost"
self.__classifier = XGBClassifier(n_estimators=500,
max_depth=6,
learning_rate=0.05,
colsample_bytree=0.8,
colsample_bylevel=1.,
subsample=0.9, nthread=-1,
seed=0)
self.__classifier = LGBMClassifier(
n_estimators=500, learning_rate=0.05,
colsample_bytree=0.8, subsample=0.9, nthread=-1, seed=0)

elif(strategy == 'ExtraTrees'):
self.__classifier = ExtraTreesClassifier(
Expand Down Expand Up @@ -170,8 +134,8 @@ def __set_classifier(self, strategy):

else:
raise ValueError(
"Strategy invalid. Please choose between 'LightGBM' "
"(if installed), 'XGBoost', 'RandomForest', 'ExtraTrees', "
"Strategy invalid. Please choose between 'LightGBM'"
", 'RandomForest', 'ExtraTrees', "
"'Tree', 'Bagging', 'AdaBoost' or 'Linear'")


Expand Down Expand Up @@ -231,8 +195,7 @@ def feature_importances(self):
for i, col in enumerate(self.__col):
importance[col] = f[i]

elif (self.get_params()["strategy"] in ["LightGBM", "XGBoost",
"RandomForest",
elif (self.get_params()["strategy"] in ["LightGBM", "RandomForest",
"ExtraTrees", "Tree"]):

importance = {}
Expand All @@ -248,7 +211,7 @@ def feature_importances(self):
norm = self.get_estimator().estimator_weights_.sum()

try:
# XGB, RF, ET, Tree and AdaBoost
# LGB, RF, ET, Tree and AdaBoost
f = sum(weight * est.feature_importances_
for weight, est in zip(self.get_estimator().estimator_weights_, self.get_estimator().estimators_)) / norm # noqa

Expand All @@ -270,7 +233,7 @@ def feature_importances(self):
d = {}

try:
# XGB, RF, ET, Tree and AdaBoost
# LGB, RF, ET, Tree and AdaBoost
f = b.feature_importances_
except: # noqa
# Linear
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class StackingClassifier():
Parameters
----------
base_estimators : list, default = [Classifier(strategy="XGBoost"), Classifier(strategy="RandomForest"),Classifier(strategy="ExtraTrees")]
base_estimators : list, default = [Classifier(strategy="LightGBM"), Classifier(strategy="RandomForest"),Classifier(strategy="ExtraTrees")]
List of estimators to fit in the first level using a cross validation.
level_estimator : object, default = LogisticRegression()
Expand All @@ -46,7 +46,7 @@ class StackingClassifier():
"""

def __init__(self,
base_estimators=[Classifier(strategy="XGBoost"),
base_estimators=[Classifier(strategy="LightGBM"),
Classifier(strategy="RandomForest"),
Classifier(strategy="ExtraTrees")],
level_estimator=LogisticRegression(n_jobs=-1),
Expand Down
56 changes: 12 additions & 44 deletions python-package/mlbox/model/supervised/regression/regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,9 @@
import pandas as pd
from sklearn.ensemble import (AdaBoostRegressor, BaggingRegressor,
ExtraTreesRegressor, RandomForestRegressor)
from xgboost import XGBRegressor
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor

global lgbm_installed

try:
from lightgbm import LGBMRegressor
lgbm_installed = True
except Exception:
warnings.warn(
"Package lightgbm is not installed. Model LightGBM will be replaced"
"by XGBoost")
lgbm_installed = False
from lightgbm import LGBMRegressor


class Regressor():
Expand All @@ -32,10 +21,9 @@ class Regressor():
Parameters
----------
strategy : str, default = "LightGBM" if installed else "XGBoost"
strategy : str, default = "LightGBM"
The choice for the regressor.
Available strategies = "LightGBM" (if installed), "XGBoost",
"RandomForest", "ExtraTrees", "Tree", "Bagging", "AdaBoost" or "Linear"
Available strategies = "LightGBM", "RandomForest", "ExtraTrees", "Tree", "Bagging", "AdaBoost" or "Linear"
**params : default = None
Parameters of the corresponding regressor.
Expand All @@ -47,10 +35,7 @@ def __init__(self, **params):
if ("strategy" in params):
self.__strategy = params["strategy"]
else:
if (lgbm_installed):
self.__strategy = "LightGBM"
else:
self.__strategy = "XGBoost"
self.__strategy = "LightGBM"

self.__regress_params = {}

Expand Down Expand Up @@ -112,26 +97,10 @@ def __set_regressor(self, strategy):
n_estimators=400, max_depth=10, max_features='sqrt',
bootstrap=True, n_jobs=-1, random_state=0)

elif(strategy == 'XGBoost'):
self.__regressor = XGBRegressor(
n_estimators=500, max_depth=6, learning_rate=0.05,
colsample_bytree=0.8, colsample_bylevel=1., subsample=0.9,
nthread=-1, seed=0)

elif(strategy == "LightGBM"):
if(lgbm_installed):
self.__regressor = LGBMRegressor(
n_estimators=500, learning_rate=0.05,
colsample_bytree=0.8, subsample=0.9, nthread=-1, seed=0)
else:
warnings.warn(
"Package lightgbm is not installed. Model LightGBM will be"
"replaced by XGBoost")
self.__strategy = "XGBoost"
self.__regressor = XGBRegressor(
n_estimators=500, max_depth=6, learning_rate=0.05,
colsample_bytree=0.8, colsample_bylevel=1.,
subsample=0.9, nthread=-1, seed=0)
self.__regressor = LGBMRegressor(
n_estimators=500, learning_rate=0.05,
colsample_bytree=0.8, subsample=0.9, nthread=-1, seed=0)

elif(strategy == 'ExtraTrees'):
self.__regressor = ExtraTreesRegressor(
Expand Down Expand Up @@ -163,8 +132,8 @@ def __set_regressor(self, strategy):

else:
raise ValueError(
"Strategy invalid. Please choose between 'LightGBM' "
"(if installed), 'XGBoost', 'RandomForest', 'ExtraTrees', "
"Strategy invalid. Please choose between 'LightGBM'"
", 'RandomForest', 'ExtraTrees', "
"'Tree', 'Bagging', 'AdaBoost' or 'Linear'")


Expand Down Expand Up @@ -224,8 +193,7 @@ def feature_importances(self):
for i, col in enumerate(self.__col):
importance[col] = f[i]

elif (self.get_params()["strategy"] in ["LightGBM", "XGBoost",
"RandomForest",
elif (self.get_params()["strategy"] in ["LightGBM", "RandomForest",
"ExtraTrees", "Tree"]):

importance = {}
Expand All @@ -241,7 +209,7 @@ def feature_importances(self):
norm = self.get_estimator().estimator_weights_.sum()

try:
# XGB, RF, ET, Tree and AdaBoost
# LGB, RF, ET, Tree and AdaBoost
# TODO: Refactor this part
f = sum(weight * est.feature_importances_ for weight, est in zip(self.get_estimator().estimator_weights_, self.get_estimator().estimators_)) / norm # noqa

Expand All @@ -261,7 +229,7 @@ def feature_importances(self):
d = {}

try:
# XGB, RF, ET, Tree and AdaBoost
# LGB, RF, ET, Tree and AdaBoost
f = b.feature_importances_
except Exception:
f = np.abs(b.coef_) # Linear
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class StackingRegressor():
Parameters
----------
base_estimators : list, default = [Regressor(strategy="XGBoost"), Regressor(strategy="RandomForest"), Regressor(strategy="ExtraTrees")]
base_estimators : list, default = [Regressor(strategy="LightGBM"), Regressor(strategy="RandomForest"), Regressor(strategy="ExtraTrees")]
List of estimators to fit in the first level using a cross validation.
level_estimator : object, default = LinearRegression()
Expand All @@ -43,7 +43,7 @@ class StackingRegressor():
Verbose mode.
"""

def __init__(self, base_estimators=[Regressor(strategy="XGBoost"),
def __init__(self, base_estimators=[Regressor(strategy="LightGBM"),
Regressor(strategy="RandomForest"),
Regressor(strategy="ExtraTrees")],
level_estimator=LinearRegression(), n_folds=5,
Expand Down
14 changes: 6 additions & 8 deletions python-package/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
numpy==1.13.0
matplotlib==2.0.2
numpy>=1.13.0
matplotlib>=2.0.2
hyperopt==0.1
Keras==2.0.4
pandas==0.20.3
joblib==0.11
Keras==2.1.2
pandas==0.21.0
scikit-learn==0.19.0
Theano==0.9.0
xgboost==0.6a2
lightgbm==2.0.2
Theano==1.0.1
lightgbm==2.0.11
networkx==1.11
19 changes: 5 additions & 14 deletions python-package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,11 @@
import pip
from setuptools import setup
from setuptools.command.install import install
from mlbox.__init__ import __version__

with open('requirements.txt', 'rt') as fh:
requirements = fh.read().splitlines()

requirements = [
"numpy==1.13.0",
"matplotlib==2.0.2",
"hyperopt==0.1",
"Keras==2.0.4",
"pandas==0.20.3",
"joblib==0.11",
"scikit-learn==0.19.0",
"Theano==0.9.0",
"xgboost==0.6a2",
"lightgbm==2.0.2",
"networkx==1.11"
]

class OverrideInstallCommand(install):
def run(self):
Expand Down Expand Up @@ -45,7 +36,7 @@ def run(self):

setup(
name='mlbox',
version='0.5.3',
version=__version__,
description="A powerful Automated Machine Learning python library.",
long_description=readme,
author="Axel ARONIO DE ROMBLAY",
Expand Down

1 comment on commit 96c50e9

@dimagalat
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AxeldeRomblay Axel, do you mind explaining why you decided to remove XGBoost? thanks

Please sign in to comment.