Merge pull request #841 from weixuanfu/tpot-0.9.6

TPOT 0.9.6 Minor Release
EpistasisLab · Mar 1, 2019 · 8e032b8 · 8e032b8
2 parents 507b45d + 4a5575d
commit 8e032b8
Show file tree

Hide file tree

Showing 22 changed files with 222 additions and 176 deletions.
diff --git a/.appveyor.yml b/.appveyor.yml
@@ -19,9 +19,9 @@ install:
   - conda config --set always_yes yes --set changeps1 no
   - conda update -q conda
   - conda info -a
-  - conda create -q -n test-environment python=%PYTHON_VERSION% numpy scipy scikit-learn nose cython pandas
+  - conda create -q -n test-environment python=%PYTHON_VERSION% numpy scipy scikit-learn nose cython pandas pywin32
   - activate test-environment
-  - pip install deap tqdm update_checker pypiwin32 stopit dask[delayed] dask-ml
+  - pip install deap tqdm update_checker stopit dask[delayed] dask-ml cloudpickle==0.5.6
 
 
 test_script:

diff --git a/ci/.travis_install.sh b/ci/.travis_install.sh
@@ -23,10 +23,10 @@ deactivate
 
 # Use the miniconda installer for faster download / install of conda
 # itself
-wget http://repo.continuum.io/miniconda/Miniconda-3.9.1-Linux-x86_64.sh \
+wget https://repo.continuum.io/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh \
     -O miniconda.sh
 chmod +x miniconda.sh && ./miniconda.sh -b
-export PATH=/home/travis/miniconda/bin:$PATH
+export PATH=/home/travis/miniconda3/bin:$PATH
 conda update --yes conda
 
 # Configure the conda environment and put it in the path using the
@@ -56,6 +56,7 @@ pip install stopit
 pip install xgboost
 pip install dask[delayed]
 pip install dask-ml
+pip install cloudpickle==0.5.6
 
 if [[ "$COVERAGE" == "true" ]]; then
     pip install coverage coveralls

diff --git a/docs/api/index.html b/docs/api/index.html
@@ -186,9 +186,9 @@ <h1 id="classification">Classification</h1>
 Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline.
 </blockquote>
 
-<strong>offspring_size</strong>: int, optional (default=100)
+<strong>offspring_size</strong>: int, optional (default=None)
 <blockquote>
-Number of offspring to produce in each genetic programming generation. Must be a positive number.
+Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.
 </blockquote>
 
 <strong>mutation_rate</strong>: float, optional (default=0.9)
@@ -317,7 +317,7 @@ <h1 id="classification">Classification</h1>
 
 <strong>periodic_checkpoint_folder</strong>: path string, optional (default: None)
 <blockquote>
-If supplied, a folder in which TPOT will periodically save the best pipeline so far while optimizing.<br /><br />
+If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.<br /><br />
 Currently once per generation but not more often than once per 30 seconds.<br /><br />
 Useful in multiple cases:
 <ul>
@@ -648,9 +648,9 @@ <h1 id="regression">Regression</h1>
 Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline.
 </blockquote>
 
-<strong>offspring_size</strong>: int, optional (default=100)
+<strong>offspring_size</strong>: int, optional (default=None)
 <blockquote>
-Number of offspring to produce in each genetic programming generation. Must be a positive number.
+Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.
 </blockquote>
 
 <strong>mutation_rate</strong>: float, optional (default=0.9)
@@ -780,7 +780,7 @@ <h1 id="regression">Regression</h1>
 
 <strong>periodic_checkpoint_folder</strong>: path string, optional (default: None)
 <blockquote>
-If supplied, a folder in which TPOT will periodically save the best pipeline so far while optimizing.<br /><br />
+If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.<br /><br />
 Currently once per generation but not more often than once per 30 seconds.<br /><br />
 Useful in multiple cases:
 <ul>

diff --git a/docs/index.html b/docs/index.html
@@ -213,5 +213,5 @@
 
 <!--
 MkDocs version : 0.17.2
-Build Date UTC : 2018-08-30 20:41:42
+Build Date UTC : 2019-03-01 17:12:19
 -->
diff --git a/docs/related/index.html b/docs/related/index.html
@@ -188,9 +188,15 @@
 <tr>
 <td><a href="https://github.com/reiinakano/xcessiv">Xcessiv</a></td>
 <td>Python</td>
-<td>Apache-2.0</td>
+<td>Apache 2.0</td>
 <td>A web-based application for quick, scalable, and automated hyper-parameter tuning and stacked ensembling in Python.</td>
 </tr>
+<tr>
+<td><a href="https://github.com/PGijsbers/gama">GAMA</a></td>
+<td>Python</td>
+<td>Apache 2.0</td>
+<td>Machine-learning pipeline optimization through asynchronous evaluation based genetic programming. </td>
+</tr>
 </table>
 
             </div>

diff --git a/docs/search/search_index.json b/docs/search/search_index.json
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
@@ -4,79 +4,79 @@
 
     <url>
      <loc>http://epistasislab.github.io/tpot/</loc>
-     <lastmod>2018-08-30</lastmod>
+     <lastmod>2019-03-01</lastmod>
      <changefreq>daily</changefreq>
     </url>
 
 
 
     <url>
      <loc>http://epistasislab.github.io/tpot/installing/</loc>
-     <lastmod>2018-08-30</lastmod>
+     <lastmod>2019-03-01</lastmod>
      <changefreq>daily</changefreq>
     </url>
 
 
 
     <url>
      <loc>http://epistasislab.github.io/tpot/using/</loc>
-     <lastmod>2018-08-30</lastmod>
+     <lastmod>2019-03-01</lastmod>
      <changefreq>daily</changefreq>
     </url>
 
 
 
     <url>
      <loc>http://epistasislab.github.io/tpot/api/</loc>
-     <lastmod>2018-08-30</lastmod>
+     <lastmod>2019-03-01</lastmod>
      <changefreq>daily</changefreq>
     </url>
 
 
 
     <url>
      <loc>http://epistasislab.github.io/tpot/examples/</loc>
-     <lastmod>2018-08-30</lastmod>
+     <lastmod>2019-03-01</lastmod>
      <changefreq>daily</changefreq>
     </url>
 
 
 
     <url>
      <loc>http://epistasislab.github.io/tpot/contributing/</loc>
-     <lastmod>2018-08-30</lastmod>
+     <lastmod>2019-03-01</lastmod>
      <changefreq>daily</changefreq>
     </url>
 
 
 
     <url>
      <loc>http://epistasislab.github.io/tpot/releases/</loc>
-     <lastmod>2018-08-30</lastmod>
+     <lastmod>2019-03-01</lastmod>
      <changefreq>daily</changefreq>
     </url>
 
 
 
     <url>
      <loc>http://epistasislab.github.io/tpot/citing/</loc>
-     <lastmod>2018-08-30</lastmod>
+     <lastmod>2019-03-01</lastmod>
      <changefreq>daily</changefreq>
     </url>
 
 
 
     <url>
      <loc>http://epistasislab.github.io/tpot/support/</loc>
-     <lastmod>2018-08-30</lastmod>
+     <lastmod>2019-03-01</lastmod>
      <changefreq>daily</changefreq>
     </url>
 
 
 
     <url>
      <loc>http://epistasislab.github.io/tpot/related/</loc>
-     <lastmod>2018-08-30</lastmod>
+     <lastmod>2019-03-01</lastmod>
      <changefreq>daily</changefreq>
     </url>
 

diff --git a/docs/using/index.html b/docs/using/index.html
@@ -425,7 +425,7 @@ <h1 id="tpot-on-the-command-line">TPOT on the command line</h1>
 <td>CHECKPOINT_FOLDER</td>
 <td>Folder path</td>
 <td>
-If supplied, a folder you created, in which tpot will periodically save the best pipeline so far while optimizing.
+If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing.
 <br /><br />
 This is useful in multiple cases:
 <ul>

diff --git a/docs_sources/api.md b/docs_sources/api.md
@@ -47,9 +47,9 @@ Number of individuals to retain in the genetic programming population every gene
 Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline.
 </blockquote>
 
-<strong>offspring_size</strong>: int, optional (default=100)
+<strong>offspring_size</strong>: int, optional (default=None)
 <blockquote>
-Number of offspring to produce in each genetic programming generation. Must be a positive number.
+Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.
 </blockquote>
 
 <strong>mutation_rate</strong>: float, optional (default=0.9)
@@ -178,7 +178,7 @@ See <a href="https://dask-ml.readthedocs.io/en/latest/hyper-parameter-search.htm
 
 <strong>periodic_checkpoint_folder</strong>: path string, optional (default: None)
 <blockquote>
-If supplied, a folder in which TPOT will periodically save the best pipeline so far while optimizing.<br /><br />
+If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.<br /><br />
 Currently once per generation but not more often than once per 30 seconds.<br /><br />
 Useful in multiple cases:
 <ul>
@@ -529,9 +529,9 @@ Number of individuals to retain in the genetic programming population every gene
 Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline.
 </blockquote>
 
-<strong>offspring_size</strong>: int, optional (default=100)
+<strong>offspring_size</strong>: int, optional (default=None)
 <blockquote>
-Number of offspring to produce in each genetic programming generation. Must be a positive number.
+Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.
 </blockquote>
 
 <strong>mutation_rate</strong>: float, optional (default=0.9)
@@ -661,7 +661,7 @@ See <a href="https://dask-ml.readthedocs.io/en/latest/hyper-parameter-search.htm
 
 <strong>periodic_checkpoint_folder</strong>: path string, optional (default: None)
 <blockquote>
-If supplied, a folder in which TPOT will periodically save the best pipeline so far while optimizing.<br /><br />
+If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.<br /><br />
 Currently once per generation but not more often than once per 30 seconds.<br /><br />
 Useful in multiple cases:
 <ul>

diff --git a/docs_sources/related.md b/docs_sources/related.md
@@ -52,7 +52,13 @@ Other Automated Machine Learning (AutoML) tools and related projects:
 <tr>
 <td><a href="https://github.com/reiinakano/xcessiv">Xcessiv</a></td>
 <td>Python</td>
-<td>Apache-2.0</td>
+<td>Apache 2.0</td>
 <td>A web-based application for quick, scalable, and automated hyper-parameter tuning and stacked ensembling in Python.</td>
 </tr>
+<tr>
+<td><a href="https://github.com/PGijsbers/gama">GAMA</a></td>
+<td>Python</td>
+<td>Apache 2.0</td>
+<td>Machine-learning pipeline optimization through asynchronous evaluation based genetic programming. </td>
+</tr>
 </table>
diff --git a/docs_sources/using.md b/docs_sources/using.md
@@ -298,7 +298,7 @@ See the <a href="../using/#built-in-tpot-configurations">built-in configurations
 <td>CHECKPOINT_FOLDER</td>
 <td>Folder path</td>
 <td>
-If supplied, a folder you created, in which tpot will periodically save the best pipeline so far while optimizing.
+If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing.
 <br /><br />
 This is useful in multiple cases:
 <ul>

diff --git a/requirements.txt b/requirements.txt
@@ -3,7 +3,7 @@ nose==1.3.7
 numpy==1.12.1
 scikit-learn==0.18.1
 scipy==0.19.0
-tqdm==4.11.2
+tqdm==4.26.0
 update-checker==0.16
 stopit==1.1.1
 pandas==0.20.2
diff --git a/setup.py b/setup.py
@@ -40,7 +40,7 @@ def calculate_version():
                     'scikit-learn>=0.18.1',
                     'deap>=1.0',
                     'update_checker>=0.16',
-                    'tqdm>=4.11.2',
+                    'tqdm>=4.26.0',
                     'stopit>=1.1.1',
                     'pandas>=0.20.2'],
     extras_require={

diff --git a/tests/stacking_estimator_tests.py b/tests/stacking_estimator_tests.py
@@ -30,6 +30,8 @@
 from sklearn.pipeline import make_pipeline
 from tpot_tests import training_features, training_target, training_features_r, training_target_r
 from sklearn.model_selection import cross_val_score
+import warnings
+warnings.filterwarnings("ignore")
 
 def test_StackingEstimator_1():
     """Assert that the StackingEstimator returns transformed X with synthetic features in classification."""
@@ -76,8 +78,7 @@ def test_StackingEstimator_3():
 
     # test cv score
     cv_score = np.mean(cross_val_score(sklearn_pipeline, training_features, training_target, cv=3, scoring='accuracy'))
-
-    known_cv_score = 0.947282375315
+    known_cv_score = 0.9472823753147593
 
     assert np.allclose(known_cv_score, cv_score)
 
@@ -100,6 +101,6 @@ def test_StackingEstimator_4():
 
     # test cv score
     cv_score = np.mean(cross_val_score(sklearn_pipeline, training_features_r, training_target_r, cv=3, scoring='r2'))
-    known_cv_score = 0.795877470354
+    known_cv_score = 0.7989564328211737
 
     assert np.allclose(known_cv_score, cv_score)
diff --git a/tests/test_dask_based.py b/tests/test_dask_based.py
@@ -3,7 +3,6 @@
 https://github.com/DEAP/deap/issues/75
 """
 import unittest
-
 import nose
 from sklearn.datasets import make_classification
 
@@ -21,25 +20,25 @@ class TestDaskMatches(unittest.TestCase):
     def test_dask_matches(self):
         with dask.config.set(scheduler='single-threaded'):
             for n_jobs in [-1]:
-                X, y = make_classification(random_state=0)
+                X, y = make_classification(random_state=42)
                 a = TPOTClassifier(
-                    generations=2,
+                    generations=0,
                     population_size=5,
                     cv=3,
-                    random_state=0,
+                    random_state=42,
                     n_jobs=n_jobs,
                     use_dask=False,
                 )
                 b = TPOTClassifier(
-                    generations=2,
+                    generations=0,
                     population_size=5,
                     cv=3,
-                    random_state=0,
+                    random_state=42,
                     n_jobs=n_jobs,
                     use_dask=True,
                 )
-                b.fit(X, y)
                 a.fit(X, y)
+                b.fit(X, y)
 
                 self.assertEqual(a.score(X, y), b.score(X, y))
                 self.assertEqual(a.pareto_front_fitted_pipelines_.keys(),