Merge pull request #70 from AxeldeRomblay/cleaning

release 0.7.0
AxeldeRomblay · Jun 27, 2019 · fddc27e · fddc27e
2 parents 259ac58 + d951d9d
commit fddc27e
Show file tree

Hide file tree

Showing 85 changed files with 10,264 additions and 1,250 deletions.
diff --git a/.codecov.yml b/.codecov.yml
@@ -0,0 +1,22 @@
+codecov:
+  notify:
+    require_ci_to_pass: yes
+
+coverage:
+  precision: 2
+  round: down
+  range: "50...100"
+
+  status:
+    project:
+      default:
+        # Commits pushed to master should not make the overall
+        # project coverage decrease by more than 1%
+        target: auto
+        threshold: 1%
+    patch:
+      default:
+        # Be tolerant on slight code coverage diff on PRs to limit
+        # noisy red coverage status on github PRs.
+        target: auto
+        threshold: 1%
diff --git a/.gitignore b/.gitignore
@@ -39,6 +39,7 @@ pip-delete-this-directory.txt
 htmlcov/
 .tox/
 .coverage
+.pytest_cache/
 .coverage.*
 .cache
 nosetests.xml
@@ -70,6 +71,10 @@ target/
 # Jupyter Notebook
 .ipynb_checkpoints
 
+# pycharm
+.idea
+.DS_Store
+
 # pyenv
 .python-version
 
@@ -99,3 +104,6 @@ ENV/
 
 # mypy
 .mypy_cache/
+
+# save folders
+*save/
diff --git a/.travis.yml b/.travis.yml
@@ -1,15 +1,95 @@
 language: python
-python:
-- '2.7'
-- '3.6'
+
+# Declare all os and version of python to be tested
+# The section before install is os specific
+matrix:
+    include:
+        # Use the built in venv for linux builds
+        - os: linux
+          python: "2.7"
+
+        - os: linux
+          python: "3.5"
+
+        - os: linux
+          python: "3.6"
+
+        # Use generic language for osx
+        - os: osx
+          language: generic
+          python: "2.7"
+          before_install:
+            - brew update
+            - brew install libomp
+            - brew upgrade pyenv
+            - brew install pyenv-virtualenv
+            - pyenv install 2.7.15
+            - eval "$(pyenv init -)"
+            - pyenv virtualenv 2.7.15 venv
+            - pyenv activate venv
+
+        - os: osx
+          language: generic
+          python: "3.5"
+          before_install:
+            - brew update
+            - brew install libomp
+            - brew upgrade pyenv
+            - brew install pyenv-virtualenv
+            - pyenv install 3.5.6
+            - eval "$(pyenv init -)"
+            - pyenv virtualenv 3.5.6 venv
+            - pyenv activate venv
+
+        - os: osx
+          language: generic
+          python: "3.6"
+          before_install:
+            - brew update
+            - brew install libomp
+            - brew upgrade pyenv
+            - brew install pyenv-virtualenv
+            - pyenv install 3.6.7
+            - eval "$(pyenv init -)"
+            - pyenv virtualenv 3.6.7 venv
+            - pyenv activate venv
+
+        # Use sh language for windows
+        - os: windows
+          language: sh
+          python: "3.5"
+          before_install:
+            - choco install python --version 3.5.4
+            - export PATH="/c/Python35:/c/Python35/Scripts:$PATH"
+
+        - os: windows
+          language: sh
+          python: "3.6"
+          before_install:
+            - choco install python --version 3.6.7
+            - export PATH="/c/Python36:/c/Python36/Scripts:$PATH"
+
+# Common steps to all os
 install:
-- pip install codecov
-- pip install --upgrade setuptools wheel
-- pip install --only-binary all -r python-package/requirements.txt
+  - pip install coverage
+  - pip install codecov
+  - pip install -U pytest
+  - pip install --upgrade setuptools wheel
+
+# Install mlbox and run the tests
 script:
-- cd python-package
-- python setup.py install
-- cd ../tests
-- coverage run test_mlbox.py
+  - python setup.py install
+  - cd tests
+  - if [ "$TRAVIS_OS_NAME" = "linux" ] && [ "$TRAVIS_PYTHON_VERSION" = "3.6" ] ; then coverage run -m --source=../mlbox/ pytest; fi
+  - if [ "$TRAVIS_OS_NAME" = "linux" ] && [ "$TRAVIS_PYTHON_VERSION" != "3.6" ] ; then pytest; fi
+  - if [ "$TRAVIS_OS_NAME" = "osx" ] ; then pytest; fi
+  - if [ "$TRAVIS_OS_NAME" = "windows" ] ; then pytest; fi
+
 after_success:
-- codecov
+  - codecov
+
+deploy:
+  provider: pypi
+  user: AxeldeRomblay
+  password:
+    secure: YiApWm2gnE2SiBQQZ4gbWEJrFl+Jo381e07viF9Lgo30+mjnCEpMilAZpCl9MuzslOkXyFCv/U9JNnj9+yGtMcxJQE+82E3eoqupgoMQeGZuWIMN9cRNemxZWgOAwrzslfqEYWvMVdNzIayFYAw6HUgmNI5sHiefX2JjFA7Y0MYlL0SZBjeqUESqpjTHIS0nRTCKpv77XKo/SASJsqkpYg8OgnYgKWKvxpols/bZDwmAwy2j0izuyNAY9ASJx5awg9x/bJnn7IzvO8w+j/iQTQjs5CIa1NBW77Naqplxk1rlivIO3+3rT4QJG+xRacJHa1vLInATCcUVpbhzk/NbOPUxLXpxX6XUPDhUysR1sQbaYBbBOdLOnTAmKBv5WozLzXDUxO/3xwUNUB5+JhMikjOWiwA6Q9pJ7q/nsDMRoTcXrjN/jqMDMQ+CU7h7QBIZFoekDO2hEmWRkcgndbgzPcaTmX8AnjoZCvayYvdge47199feXkTUv+Gx68kInYuj+p3m6He36tJegCnsiAdc2eOzbPswt7/KVBbvbIfRaRj+5LtH69ozn1xpEJJOisiJnF9y0ysB00U4B4bE1kGGmxQeMI6f8Gp33wzvYYUrmB+5AESBzr7Mk4Yfc0Y36vA6NilIj7bK4WmTsWkCmmzazGOwgKi1zWC51W/SjNXyWkc=
diff --git a/AUTHORS.rst b/AUTHORS.rst
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,3 @@
+include *.md
+include *.rst
+include *.txt
diff --git a/Makefile b/Makefile
@@ -1,33 +1,4 @@
-.PHONY: clean clean-test clean-pyc clean-build docs help
-.DEFAULT_GOAL := help
-define BROWSER_PYSCRIPT
-import os, webbrowser, sys
-try:
-	from urllib import pathname2url
-except:
-	from urllib.request import pathname2url
-
-webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
-endef
-export BROWSER_PYSCRIPT
-
-define PRINT_HELP_PYSCRIPT
-import re, sys
-
-for line in sys.stdin:
-	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
-	if match:
-		target, help = match.groups()
-		print("%-20s %s" % (target, help))
-endef
-export PRINT_HELP_PYSCRIPT
-BROWSER := python -c "$$BROWSER_PYSCRIPT"
-
-help:
-	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
-
-clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
-
+clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts.
 
 clean-build: ## remove build artifacts
 	rm -fr build/
@@ -43,25 +14,19 @@ clean-pyc: ## remove Python file artifacts
 	find . -name '__pycache__' -exec rm -fr {} +
 
 clean-test: ## remove test and coverage artifacts
-	rm -fr .tox/
-	rm -f .coverage
-	rm -fr htmlcov/
-
-lint: ## check style with flake8
-	flake8 mlbox tests
+	cd tests/; \
+		rm -fr .tox/; \
+		rm -f .coverage; \
+		rm -fr htmlcov/
 
 test: ## run tests quickly with the default Python
-	py.test
-
-
-test-all: ## run tests on every Python version with tox
-	tox
+	cd tests/; \
+		pytest
 
 coverage: ## check code coverage quickly with the default Python
-	coverage run --source mlbox -m pytest
-
-		coverage report -m
-		coverage html
+	cd tests/; \
+		coverage run -m --source=../mlbox/ pytest;\
+		coverage html;\
 		$(BROWSER) htmlcov/index.html
 
 docs: ## generate Sphinx HTML documentation, including API docs
@@ -72,9 +37,6 @@ docs: ## generate Sphinx HTML documentation, including API docs
 	$(MAKE) -C docs html
 	$(BROWSER) docs/_build/html/index.html
 
-servedocs: docs ## compile the docs watching for changes
-	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
-
 release: clean ## package and upload a release
 	python setup.py sdist upload
 	python setup.py bdist_wheel upload
@@ -86,3 +48,6 @@ dist: clean ## builds source and wheel package
 
 install: clean ## install the package to the active Python's site-packages
 	python setup.py install
+
+develop: clean ## install the package to the active Python's site-packages in developer mode
+	python setup.py develop
diff --git a/README.rst b/README.rst
@@ -1,6 +1,6 @@
 .. image:: docs/logos/logo.png
 
-|Documentation Status| |PyPI version| |Build Status| |Windows Build Status| |GitHub Issues| |codecov| |License| |Downloads| |Python Versions|
+|Documentation Status| |PyPI version| |Build Status| |GitHub Issues| |codecov| |License| |Downloads| |Python Versions|
 
 -----------------------
 
@@ -11,96 +11,11 @@
 * Highly robust feature selection and leak detection
 * Accurate hyper-parameter optimization in high-dimensional space
 * State-of-the art predictive models for classification and regression (Deep Learning, Stacking, LightGBM,...)
-* Prediction with models interpretation 
+* Prediction with models interpretation
 
 
 **For more details**, please refer to the `official documentation <https://mlbox.readthedocs.io/en/latest/>`__
 
---------------------------
-
-
-Getting started: 30 seconds to MLBox
-====================================
-
-MLBox main package contains 3 sub-packages : **preprocessing**, **optimisation** and **prediction**. Each one of them are respectively aimed at reading and preprocessing data, testing or optimising a wide range of learners and predicting the target on a test dataset.
-
-**Here are a few lines to import the MLBox:**
-
-.. code-block:: python 
-
-   from mlbox.preprocessing import *
-   from mlbox.optimisation import *
-   from mlbox.prediction import *
-
-
-**Then, all you need to give is :** 
-
-* the list of paths to your train datasets and test datasets
-* the name of the target you try to predict (classification or regression)
-
-.. code-block:: python 
-
-   paths = ["<file_1>.csv", "<file_2>.csv", ..., "<file_n>.csv"] #to modify
-   target_name = "<my_target>" #to modify
-
-
-**Now, let the MLBox do the job !**
-
-... to read and preprocess your files : 
-
-.. code-block:: python 
-
-   data = Reader(sep=",").train_test_split(paths, target_name)  #reading
-   data = Drift_thresholder().fit_transform(data)  #deleting non-stable variables
-
-... to evaluate models (here default configuration):
-
-.. code-block:: python 
-
-   Optimiser().evaluate(None, data)
-
-
-... or to test and optimize the whole Pipeline [**OPTIONAL**]:
-
-* missing data encoder, aka 'ne'
-* categorical variables encoder, aka 'ce'
-* feature selector, aka 'fs'
-* meta-features stacker, aka 'stck'
-* final estimator, aka 'est'
-
-**NB** : please have a look at all the possibilities you have to configure the Pipeline (steps, parameters and values...) 
-
-.. code-block:: python 
-
-   space = {
-   
-           'ne__numerical_strategy' : {"space" : [0, 'mean']},
-
-           'ce__strategy' : {"space" : ["label_encoding", "random_projection", "entity_embedding"]},
-
-           'fs__strategy' : {"space" : ["variance", "rf_feature_importance"]},
-           'fs__threshold': {"search" : "choice", "space" : [0.1, 0.2, 0.3]},             
-
-           'est__strategy' : {"space" : ["LightGBM"]},
-           'est__max_depth' : {"search" : "choice", "space" : [5,6]},
-           'est__subsample' : {"search" : "uniform", "space" : [0.6,0.9]}
-           
-           }
-
-   best = opt.optimise(space, data, max_evals = 5)
-
-... finally to predict on the test set with the best parameters (or None for default configuration):
-
-.. code-block:: python 
-
-   Predictor().fit_predict(best, data)
-
-
-**That's all !** You can have a look at the folder "save" where you can find :
-
-* your predictions
-* feature importances
-* drift coefficients of your variables (0.5 = very stable, 1. = not stable at all)
 
 --------------------------
 
@@ -110,7 +25,7 @@ How to Contribute
 MLBox has been developed and used by many active community members. Your help is very valuable to make it better for everyone.
 
 - Check out `call for contributions <https://github.com/AxeldeRomblay/MLBox/labels/call-for-contributions>`__ to see what can be improved, or open an issue if you want something.
-- Contribute to the `tests <https://github.com/AxeldeRomblay/MLBox/tree/master/tests>`__ to make it more reliable. 
+- Contribute to the `tests <https://github.com/AxeldeRomblay/MLBox/tree/master/tests>`__ to make it more reliable.
 - Contribute to the `documents <https://github.com/AxeldeRomblay/MLBox/tree/master/docs>`__ to make it clearer for everyone.
 - Contribute to the `examples <https://github.com/AxeldeRomblay/MLBox/tree/master/examples>`__ to share your experience with other users.
 - Open `issue <https://github.com/AxeldeRomblay/MLBox/issues>`__ if you met problems during development.
@@ -123,8 +38,6 @@ For more details, please refer to `CONTRIBUTING <https://github.com/AxeldeRombla
    :target: https://pypi.python.org/pypi/mlbox
 .. |Build Status| image:: https://travis-ci.org/AxeldeRomblay/MLBox.svg?branch=master
    :target: https://travis-ci.org/AxeldeRomblay/MLBox
-.. |Windows Build Status| image:: https://ci.appveyor.com/api/projects/status/5ypa8vaed6kpmli8?svg=true
-   :target: https://ci.appveyor.com/project/AxeldeRomblay/mlbox
 .. |GitHub Issues| image:: https://img.shields.io/github/issues/AxeldeRomblay/MLBox.svg
    :target: https://github.com/AxeldeRomblay/MLBox/issues
 .. |codecov| image:: https://codecov.io/gh/AxeldeRomblay/MLBox/branch/master/graph/badge.svg

diff --git a/VERSION.txt b/VERSION.txt
@@ -0,0 +1 @@
+0.7.0