Skip to content

Commit

Permalink
Merge pull request #841 from weixuanfu/tpot-0.9.6
Browse files Browse the repository at this point in the history
TPOT 0.9.6 Minor Release
  • Loading branch information
weixuanfu committed Mar 1, 2019
2 parents 507b45d + 4a5575d commit 8e032b8
Show file tree
Hide file tree
Showing 22 changed files with 222 additions and 176 deletions.
4 changes: 2 additions & 2 deletions .appveyor.yml
Expand Up @@ -19,9 +19,9 @@ install:
- conda config --set always_yes yes --set changeps1 no
- conda update -q conda
- conda info -a
- conda create -q -n test-environment python=%PYTHON_VERSION% numpy scipy scikit-learn nose cython pandas
- conda create -q -n test-environment python=%PYTHON_VERSION% numpy scipy scikit-learn nose cython pandas pywin32
- activate test-environment
- pip install deap tqdm update_checker pypiwin32 stopit dask[delayed] dask-ml
- pip install deap tqdm update_checker stopit dask[delayed] dask-ml cloudpickle==0.5.6


test_script:
Expand Down
5 changes: 3 additions & 2 deletions ci/.travis_install.sh
Expand Up @@ -23,10 +23,10 @@ deactivate

# Use the miniconda installer for faster download / install of conda
# itself
wget http://repo.continuum.io/miniconda/Miniconda-3.9.1-Linux-x86_64.sh \
wget https://repo.continuum.io/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh \
-O miniconda.sh
chmod +x miniconda.sh && ./miniconda.sh -b
export PATH=/home/travis/miniconda/bin:$PATH
export PATH=/home/travis/miniconda3/bin:$PATH
conda update --yes conda

# Configure the conda environment and put it in the path using the
Expand Down Expand Up @@ -56,6 +56,7 @@ pip install stopit
pip install xgboost
pip install dask[delayed]
pip install dask-ml
pip install cloudpickle==0.5.6

if [[ "$COVERAGE" == "true" ]]; then
pip install coverage coveralls
Expand Down
12 changes: 6 additions & 6 deletions docs/api/index.html
Expand Up @@ -186,9 +186,9 @@ <h1 id="classification">Classification</h1>
Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline.
</blockquote>

<strong>offspring_size</strong>: int, optional (default=100)
<strong>offspring_size</strong>: int, optional (default=None)
<blockquote>
Number of offspring to produce in each genetic programming generation. Must be a positive number.
Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.
</blockquote>

<strong>mutation_rate</strong>: float, optional (default=0.9)
Expand Down Expand Up @@ -317,7 +317,7 @@ <h1 id="classification">Classification</h1>

<strong>periodic_checkpoint_folder</strong>: path string, optional (default: None)
<blockquote>
If supplied, a folder in which TPOT will periodically save the best pipeline so far while optimizing.<br /><br />
If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.<br /><br />
Currently once per generation but not more often than once per 30 seconds.<br /><br />
Useful in multiple cases:
<ul>
Expand Down Expand Up @@ -648,9 +648,9 @@ <h1 id="regression">Regression</h1>
Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline.
</blockquote>

<strong>offspring_size</strong>: int, optional (default=100)
<strong>offspring_size</strong>: int, optional (default=None)
<blockquote>
Number of offspring to produce in each genetic programming generation. Must be a positive number.
Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.
</blockquote>

<strong>mutation_rate</strong>: float, optional (default=0.9)
Expand Down Expand Up @@ -780,7 +780,7 @@ <h1 id="regression">Regression</h1>

<strong>periodic_checkpoint_folder</strong>: path string, optional (default: None)
<blockquote>
If supplied, a folder in which TPOT will periodically save the best pipeline so far while optimizing.<br /><br />
If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.<br /><br />
Currently once per generation but not more often than once per 30 seconds.<br /><br />
Useful in multiple cases:
<ul>
Expand Down
2 changes: 1 addition & 1 deletion docs/index.html
Expand Up @@ -213,5 +213,5 @@

<!--
MkDocs version : 0.17.2
Build Date UTC : 2018-08-30 20:41:42
Build Date UTC : 2019-03-01 17:12:19
-->
8 changes: 7 additions & 1 deletion docs/related/index.html
Expand Up @@ -188,9 +188,15 @@
<tr>
<td><a href="https://github.com/reiinakano/xcessiv">Xcessiv</a></td>
<td>Python</td>
<td>Apache-2.0</td>
<td>Apache 2.0</td>
<td>A web-based application for quick, scalable, and automated hyper-parameter tuning and stacked ensembling in Python.</td>
</tr>
<tr>
<td><a href="https://github.com/PGijsbers/gama">GAMA</a></td>
<td>Python</td>
<td>Apache 2.0</td>
<td>Machine-learning pipeline optimization through asynchronous evaluation based genetic programming. </td>
</tr>
</table>

</div>
Expand Down
12 changes: 6 additions & 6 deletions docs/search/search_index.json

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions docs/sitemap.xml
Expand Up @@ -4,79 +4,79 @@

<url>
<loc>http://epistasislab.github.io/tpot/</loc>
<lastmod>2018-08-30</lastmod>
<lastmod>2019-03-01</lastmod>
<changefreq>daily</changefreq>
</url>



<url>
<loc>http://epistasislab.github.io/tpot/installing/</loc>
<lastmod>2018-08-30</lastmod>
<lastmod>2019-03-01</lastmod>
<changefreq>daily</changefreq>
</url>



<url>
<loc>http://epistasislab.github.io/tpot/using/</loc>
<lastmod>2018-08-30</lastmod>
<lastmod>2019-03-01</lastmod>
<changefreq>daily</changefreq>
</url>



<url>
<loc>http://epistasislab.github.io/tpot/api/</loc>
<lastmod>2018-08-30</lastmod>
<lastmod>2019-03-01</lastmod>
<changefreq>daily</changefreq>
</url>



<url>
<loc>http://epistasislab.github.io/tpot/examples/</loc>
<lastmod>2018-08-30</lastmod>
<lastmod>2019-03-01</lastmod>
<changefreq>daily</changefreq>
</url>



<url>
<loc>http://epistasislab.github.io/tpot/contributing/</loc>
<lastmod>2018-08-30</lastmod>
<lastmod>2019-03-01</lastmod>
<changefreq>daily</changefreq>
</url>



<url>
<loc>http://epistasislab.github.io/tpot/releases/</loc>
<lastmod>2018-08-30</lastmod>
<lastmod>2019-03-01</lastmod>
<changefreq>daily</changefreq>
</url>



<url>
<loc>http://epistasislab.github.io/tpot/citing/</loc>
<lastmod>2018-08-30</lastmod>
<lastmod>2019-03-01</lastmod>
<changefreq>daily</changefreq>
</url>



<url>
<loc>http://epistasislab.github.io/tpot/support/</loc>
<lastmod>2018-08-30</lastmod>
<lastmod>2019-03-01</lastmod>
<changefreq>daily</changefreq>
</url>



<url>
<loc>http://epistasislab.github.io/tpot/related/</loc>
<lastmod>2018-08-30</lastmod>
<lastmod>2019-03-01</lastmod>
<changefreq>daily</changefreq>
</url>

Expand Down
2 changes: 1 addition & 1 deletion docs/using/index.html
Expand Up @@ -425,7 +425,7 @@ <h1 id="tpot-on-the-command-line">TPOT on the command line</h1>
<td>CHECKPOINT_FOLDER</td>
<td>Folder path</td>
<td>
If supplied, a folder you created, in which tpot will periodically save the best pipeline so far while optimizing.
If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing.
<br /><br />
This is useful in multiple cases:
<ul>
Expand Down
12 changes: 6 additions & 6 deletions docs_sources/api.md
Expand Up @@ -47,9 +47,9 @@ Number of individuals to retain in the genetic programming population every gene
Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline.
</blockquote>

<strong>offspring_size</strong>: int, optional (default=100)
<strong>offspring_size</strong>: int, optional (default=None)
<blockquote>
Number of offspring to produce in each genetic programming generation. Must be a positive number.
Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.
</blockquote>

<strong>mutation_rate</strong>: float, optional (default=0.9)
Expand Down Expand Up @@ -178,7 +178,7 @@ See <a href="https://dask-ml.readthedocs.io/en/latest/hyper-parameter-search.htm

<strong>periodic_checkpoint_folder</strong>: path string, optional (default: None)
<blockquote>
If supplied, a folder in which TPOT will periodically save the best pipeline so far while optimizing.<br /><br />
If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.<br /><br />
Currently once per generation but not more often than once per 30 seconds.<br /><br />
Useful in multiple cases:
<ul>
Expand Down Expand Up @@ -529,9 +529,9 @@ Number of individuals to retain in the genetic programming population every gene
Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline.
</blockquote>

<strong>offspring_size</strong>: int, optional (default=100)
<strong>offspring_size</strong>: int, optional (default=None)
<blockquote>
Number of offspring to produce in each genetic programming generation. Must be a positive number.
Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.
</blockquote>

<strong>mutation_rate</strong>: float, optional (default=0.9)
Expand Down Expand Up @@ -661,7 +661,7 @@ See <a href="https://dask-ml.readthedocs.io/en/latest/hyper-parameter-search.htm

<strong>periodic_checkpoint_folder</strong>: path string, optional (default: None)
<blockquote>
If supplied, a folder in which TPOT will periodically save the best pipeline so far while optimizing.<br /><br />
If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.<br /><br />
Currently once per generation but not more often than once per 30 seconds.<br /><br />
Useful in multiple cases:
<ul>
Expand Down
8 changes: 7 additions & 1 deletion docs_sources/related.md
Expand Up @@ -52,7 +52,13 @@ Other Automated Machine Learning (AutoML) tools and related projects:
<tr>
<td><a href="https://github.com/reiinakano/xcessiv">Xcessiv</a></td>
<td>Python</td>
<td>Apache-2.0</td>
<td>Apache 2.0</td>
<td>A web-based application for quick, scalable, and automated hyper-parameter tuning and stacked ensembling in Python.</td>
</tr>
<tr>
<td><a href="https://github.com/PGijsbers/gama">GAMA</a></td>
<td>Python</td>
<td>Apache 2.0</td>
<td>Machine-learning pipeline optimization through asynchronous evaluation based genetic programming. </td>
</tr>
</table>
2 changes: 1 addition & 1 deletion docs_sources/using.md
Expand Up @@ -298,7 +298,7 @@ See the <a href="../using/#built-in-tpot-configurations">built-in configurations
<td>CHECKPOINT_FOLDER</td>
<td>Folder path</td>
<td>
If supplied, a folder you created, in which tpot will periodically save the best pipeline so far while optimizing.
If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing.
<br /><br />
This is useful in multiple cases:
<ul>
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Expand Up @@ -3,7 +3,7 @@ nose==1.3.7
numpy==1.12.1
scikit-learn==0.18.1
scipy==0.19.0
tqdm==4.11.2
tqdm==4.26.0
update-checker==0.16
stopit==1.1.1
pandas==0.20.2
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -40,7 +40,7 @@ def calculate_version():
'scikit-learn>=0.18.1',
'deap>=1.0',
'update_checker>=0.16',
'tqdm>=4.11.2',
'tqdm>=4.26.0',
'stopit>=1.1.1',
'pandas>=0.20.2'],
extras_require={
Expand Down
7 changes: 4 additions & 3 deletions tests/stacking_estimator_tests.py
Expand Up @@ -30,6 +30,8 @@
from sklearn.pipeline import make_pipeline
from tpot_tests import training_features, training_target, training_features_r, training_target_r
from sklearn.model_selection import cross_val_score
import warnings
warnings.filterwarnings("ignore")

def test_StackingEstimator_1():
"""Assert that the StackingEstimator returns transformed X with synthetic features in classification."""
Expand Down Expand Up @@ -76,8 +78,7 @@ def test_StackingEstimator_3():

# test cv score
cv_score = np.mean(cross_val_score(sklearn_pipeline, training_features, training_target, cv=3, scoring='accuracy'))

known_cv_score = 0.947282375315
known_cv_score = 0.9472823753147593

assert np.allclose(known_cv_score, cv_score)

Expand All @@ -100,6 +101,6 @@ def test_StackingEstimator_4():

# test cv score
cv_score = np.mean(cross_val_score(sklearn_pipeline, training_features_r, training_target_r, cv=3, scoring='r2'))
known_cv_score = 0.795877470354
known_cv_score = 0.7989564328211737

assert np.allclose(known_cv_score, cv_score)
13 changes: 6 additions & 7 deletions tests/test_dask_based.py
Expand Up @@ -3,7 +3,6 @@
https://github.com/DEAP/deap/issues/75
"""
import unittest

import nose
from sklearn.datasets import make_classification

Expand All @@ -21,25 +20,25 @@ class TestDaskMatches(unittest.TestCase):
def test_dask_matches(self):
with dask.config.set(scheduler='single-threaded'):
for n_jobs in [-1]:
X, y = make_classification(random_state=0)
X, y = make_classification(random_state=42)
a = TPOTClassifier(
generations=2,
generations=0,
population_size=5,
cv=3,
random_state=0,
random_state=42,
n_jobs=n_jobs,
use_dask=False,
)
b = TPOTClassifier(
generations=2,
generations=0,
population_size=5,
cv=3,
random_state=0,
random_state=42,
n_jobs=n_jobs,
use_dask=True,
)
b.fit(X, y)
a.fit(X, y)
b.fit(X, y)

self.assertEqual(a.score(X, y), b.score(X, y))
self.assertEqual(a.pareto_front_fitted_pipelines_.keys(),
Expand Down

0 comments on commit 8e032b8

Please sign in to comment.