Skip to content

Commit

Permalink
Merge pull request #70 from AxeldeRomblay/cleaning
Browse files Browse the repository at this point in the history
release 0.7.0
  • Loading branch information
AxeldeRomblay committed Jun 27, 2019
2 parents 259ac58 + d951d9d commit fddc27e
Show file tree
Hide file tree
Showing 85 changed files with 10,264 additions and 1,250 deletions.
22 changes: 22 additions & 0 deletions .codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
codecov:
notify:
require_ci_to_pass: yes

coverage:
precision: 2
round: down
range: "50...100"

status:
project:
default:
# Commits pushed to master should not make the overall
# project coverage decrease by more than 1%
target: auto
threshold: 1%
patch:
default:
# Be tolerant on slight code coverage diff on PRs to limit
# noisy red coverage status on github PRs.
target: auto
threshold: 1%
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ pip-delete-this-directory.txt
htmlcov/
.tox/
.coverage
.pytest_cache/
.coverage.*
.cache
nosetests.xml
Expand Down Expand Up @@ -70,6 +71,10 @@ target/
# Jupyter Notebook
.ipynb_checkpoints

# pycharm
.idea
.DS_Store

# pyenv
.python-version

Expand Down Expand Up @@ -99,3 +104,6 @@ ENV/

# mypy
.mypy_cache/

# save folders
*save/
102 changes: 91 additions & 11 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,95 @@
language: python
python:
- '2.7'
- '3.6'

# Declare all os and version of python to be tested
# The section before install is os specific
matrix:
include:
# Use the built in venv for linux builds
- os: linux
python: "2.7"

- os: linux
python: "3.5"

- os: linux
python: "3.6"

# Use generic language for osx
- os: osx
language: generic
python: "2.7"
before_install:
- brew update
- brew install libomp
- brew upgrade pyenv
- brew install pyenv-virtualenv
- pyenv install 2.7.15
- eval "$(pyenv init -)"
- pyenv virtualenv 2.7.15 venv
- pyenv activate venv

- os: osx
language: generic
python: "3.5"
before_install:
- brew update
- brew install libomp
- brew upgrade pyenv
- brew install pyenv-virtualenv
- pyenv install 3.5.6
- eval "$(pyenv init -)"
- pyenv virtualenv 3.5.6 venv
- pyenv activate venv

- os: osx
language: generic
python: "3.6"
before_install:
- brew update
- brew install libomp
- brew upgrade pyenv
- brew install pyenv-virtualenv
- pyenv install 3.6.7
- eval "$(pyenv init -)"
- pyenv virtualenv 3.6.7 venv
- pyenv activate venv

# Use sh language for windows
- os: windows
language: sh
python: "3.5"
before_install:
- choco install python --version 3.5.4
- export PATH="/c/Python35:/c/Python35/Scripts:$PATH"

- os: windows
language: sh
python: "3.6"
before_install:
- choco install python --version 3.6.7
- export PATH="/c/Python36:/c/Python36/Scripts:$PATH"

# Common steps to all os
install:
- pip install codecov
- pip install --upgrade setuptools wheel
- pip install --only-binary all -r python-package/requirements.txt
- pip install coverage
- pip install codecov
- pip install -U pytest
- pip install --upgrade setuptools wheel

# Install mlbox and run the tests
script:
- cd python-package
- python setup.py install
- cd ../tests
- coverage run test_mlbox.py
- python setup.py install
- cd tests
- if [ "$TRAVIS_OS_NAME" = "linux" ] && [ "$TRAVIS_PYTHON_VERSION" = "3.6" ] ; then coverage run -m --source=../mlbox/ pytest; fi
- if [ "$TRAVIS_OS_NAME" = "linux" ] && [ "$TRAVIS_PYTHON_VERSION" != "3.6" ] ; then pytest; fi
- if [ "$TRAVIS_OS_NAME" = "osx" ] ; then pytest; fi
- if [ "$TRAVIS_OS_NAME" = "windows" ] ; then pytest; fi

after_success:
- codecov
- codecov

deploy:
provider: pypi
user: AxeldeRomblay
password:
secure: YiApWm2gnE2SiBQQZ4gbWEJrFl+Jo381e07viF9Lgo30+mjnCEpMilAZpCl9MuzslOkXyFCv/U9JNnj9+yGtMcxJQE+82E3eoqupgoMQeGZuWIMN9cRNemxZWgOAwrzslfqEYWvMVdNzIayFYAw6HUgmNI5sHiefX2JjFA7Y0MYlL0SZBjeqUESqpjTHIS0nRTCKpv77XKo/SASJsqkpYg8OgnYgKWKvxpols/bZDwmAwy2j0izuyNAY9ASJx5awg9x/bJnn7IzvO8w+j/iQTQjs5CIa1NBW77Naqplxk1rlivIO3+3rT4QJG+xRacJHa1vLInATCcUVpbhzk/NbOPUxLXpxX6XUPDhUysR1sQbaYBbBOdLOnTAmKBv5WozLzXDUxO/3xwUNUB5+JhMikjOWiwA6Q9pJ7q/nsDMRoTcXrjN/jqMDMQ+CU7h7QBIZFoekDO2hEmWRkcgndbgzPcaTmX8AnjoZCvayYvdge47199feXkTUv+Gx68kInYuj+p3m6He36tJegCnsiAdc2eOzbPswt7/KVBbvbIfRaRj+5LtH69ozn1xpEJJOisiJnF9y0ysB00U4B4bE1kGGmxQeMI6f8Gp33wzvYYUrmB+5AESBzr7Mk4Yfc0Y36vA6NilIj7bK4WmTsWkCmmzazGOwgKi1zWC51W/SjNXyWkc=
17 changes: 0 additions & 17 deletions AUTHORS.rst

This file was deleted.

3 changes: 3 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include *.md
include *.rst
include *.txt
61 changes: 13 additions & 48 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,33 +1,4 @@
.PHONY: clean clean-test clean-pyc clean-build docs help
.DEFAULT_GOAL := help
define BROWSER_PYSCRIPT
import os, webbrowser, sys
try:
from urllib import pathname2url
except:
from urllib.request import pathname2url

webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
endef
export BROWSER_PYSCRIPT

define PRINT_HELP_PYSCRIPT
import re, sys

for line in sys.stdin:
match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
if match:
target, help = match.groups()
print("%-20s %s" % (target, help))
endef
export PRINT_HELP_PYSCRIPT
BROWSER := python -c "$$BROWSER_PYSCRIPT"

help:
@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)

clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts

clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts.

clean-build: ## remove build artifacts
rm -fr build/
Expand All @@ -43,25 +14,19 @@ clean-pyc: ## remove Python file artifacts
find . -name '__pycache__' -exec rm -fr {} +

clean-test: ## remove test and coverage artifacts
rm -fr .tox/
rm -f .coverage
rm -fr htmlcov/

lint: ## check style with flake8
flake8 mlbox tests
cd tests/; \
rm -fr .tox/; \
rm -f .coverage; \
rm -fr htmlcov/

test: ## run tests quickly with the default Python
py.test


test-all: ## run tests on every Python version with tox
tox
cd tests/; \
pytest

coverage: ## check code coverage quickly with the default Python
coverage run --source mlbox -m pytest

coverage report -m
coverage html
cd tests/; \
coverage run -m --source=../mlbox/ pytest;\
coverage html;\
$(BROWSER) htmlcov/index.html

docs: ## generate Sphinx HTML documentation, including API docs
Expand All @@ -72,9 +37,6 @@ docs: ## generate Sphinx HTML documentation, including API docs
$(MAKE) -C docs html
$(BROWSER) docs/_build/html/index.html

servedocs: docs ## compile the docs watching for changes
watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .

release: clean ## package and upload a release
python setup.py sdist upload
python setup.py bdist_wheel upload
Expand All @@ -86,3 +48,6 @@ dist: clean ## builds source and wheel package

install: clean ## install the package to the active Python's site-packages
python setup.py install

develop: clean ## install the package to the active Python's site-packages in developer mode
python setup.py develop
93 changes: 3 additions & 90 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.. image:: docs/logos/logo.png

|Documentation Status| |PyPI version| |Build Status| |Windows Build Status| |GitHub Issues| |codecov| |License| |Downloads| |Python Versions|
|Documentation Status| |PyPI version| |Build Status| |GitHub Issues| |codecov| |License| |Downloads| |Python Versions|

-----------------------

Expand All @@ -11,96 +11,11 @@
* Highly robust feature selection and leak detection
* Accurate hyper-parameter optimization in high-dimensional space
* State-of-the art predictive models for classification and regression (Deep Learning, Stacking, LightGBM,...)
* Prediction with models interpretation
* Prediction with models interpretation


**For more details**, please refer to the `official documentation <https://mlbox.readthedocs.io/en/latest/>`__

--------------------------


Getting started: 30 seconds to MLBox
====================================

MLBox main package contains 3 sub-packages : **preprocessing**, **optimisation** and **prediction**. Each one of them are respectively aimed at reading and preprocessing data, testing or optimising a wide range of learners and predicting the target on a test dataset.

**Here are a few lines to import the MLBox:**

.. code-block:: python
from mlbox.preprocessing import *
from mlbox.optimisation import *
from mlbox.prediction import *
**Then, all you need to give is :**

* the list of paths to your train datasets and test datasets
* the name of the target you try to predict (classification or regression)

.. code-block:: python
paths = ["<file_1>.csv", "<file_2>.csv", ..., "<file_n>.csv"] #to modify
target_name = "<my_target>" #to modify
**Now, let the MLBox do the job !**

... to read and preprocess your files :

.. code-block:: python
data = Reader(sep=",").train_test_split(paths, target_name) #reading
data = Drift_thresholder().fit_transform(data) #deleting non-stable variables
... to evaluate models (here default configuration):

.. code-block:: python
Optimiser().evaluate(None, data)
... or to test and optimize the whole Pipeline [**OPTIONAL**]:

* missing data encoder, aka 'ne'
* categorical variables encoder, aka 'ce'
* feature selector, aka 'fs'
* meta-features stacker, aka 'stck'
* final estimator, aka 'est'

**NB** : please have a look at all the possibilities you have to configure the Pipeline (steps, parameters and values...)

.. code-block:: python
space = {
'ne__numerical_strategy' : {"space" : [0, 'mean']},
'ce__strategy' : {"space" : ["label_encoding", "random_projection", "entity_embedding"]},
'fs__strategy' : {"space" : ["variance", "rf_feature_importance"]},
'fs__threshold': {"search" : "choice", "space" : [0.1, 0.2, 0.3]},            
'est__strategy' : {"space" : ["LightGBM"]},
'est__max_depth' : {"search" : "choice", "space" : [5,6]},
'est__subsample' : {"search" : "uniform", "space" : [0.6,0.9]}
}
best = opt.optimise(space, data, max_evals = 5)
... finally to predict on the test set with the best parameters (or None for default configuration):

.. code-block:: python
Predictor().fit_predict(best, data)
**That's all !** You can have a look at the folder "save" where you can find :

* your predictions
* feature importances
* drift coefficients of your variables (0.5 = very stable, 1. = not stable at all)

--------------------------

Expand All @@ -110,7 +25,7 @@ How to Contribute
MLBox has been developed and used by many active community members. Your help is very valuable to make it better for everyone.

- Check out `call for contributions <https://github.com/AxeldeRomblay/MLBox/labels/call-for-contributions>`__ to see what can be improved, or open an issue if you want something.
- Contribute to the `tests <https://github.com/AxeldeRomblay/MLBox/tree/master/tests>`__ to make it more reliable.
- Contribute to the `tests <https://github.com/AxeldeRomblay/MLBox/tree/master/tests>`__ to make it more reliable.
- Contribute to the `documents <https://github.com/AxeldeRomblay/MLBox/tree/master/docs>`__ to make it clearer for everyone.
- Contribute to the `examples <https://github.com/AxeldeRomblay/MLBox/tree/master/examples>`__ to share your experience with other users.
- Open `issue <https://github.com/AxeldeRomblay/MLBox/issues>`__ if you met problems during development.
Expand All @@ -123,8 +38,6 @@ For more details, please refer to `CONTRIBUTING <https://github.com/AxeldeRombla
:target: https://pypi.python.org/pypi/mlbox
.. |Build Status| image:: https://travis-ci.org/AxeldeRomblay/MLBox.svg?branch=master
:target: https://travis-ci.org/AxeldeRomblay/MLBox
.. |Windows Build Status| image:: https://ci.appveyor.com/api/projects/status/5ypa8vaed6kpmli8?svg=true
:target: https://ci.appveyor.com/project/AxeldeRomblay/mlbox
.. |GitHub Issues| image:: https://img.shields.io/github/issues/AxeldeRomblay/MLBox.svg
:target: https://github.com/AxeldeRomblay/MLBox/issues
.. |codecov| image:: https://codecov.io/gh/AxeldeRomblay/MLBox/branch/master/graph/badge.svg
Expand Down
1 change: 1 addition & 0 deletions VERSION.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.7.0

0 comments on commit fddc27e

Please sign in to comment.