diff --git a/.github/stale.yml b/.github/stale.yml
new file mode 100644
index 00000000..590605d4
--- /dev/null
+++ b/.github/stale.yml
@@ -0,0 +1,16 @@
+# Number of days of inactivity before an issue becomes stale
+daysUntilStale: 90
+# Number of days of inactivity before a stale issue is closed
+daysUntilClose: 7
+# Issues with these labels will never be considered stale
+exemptLabels:
+  - pinned
+# Label to use when marking an issue as stale
+staleLabel: stale
+# Comment to post when marking an issue as stale. Set to `false` to disable
+markComment: >
+  This issue has been automatically marked as stale because it has not had
+  recent activity. It will be closed in 7 days if no further activity occurs.
+  Thank you for your contributions.
+# Comment to post when closing a stale issue. Set to `false` to disable
+closeComment: false
diff --git a/.travis.yml b/.travis.yml
index ea7e107c..327937d8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -25,7 +25,7 @@ before_install:
   - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then export PATH=/home/travis/miniconda2/bin:$PATH; else export PATH=/home/travis/miniconda3/bin:$PATH; fi
   - conda update --yes conda
 install:
-  - conda install --yes --channel defaults --channel conda-forge python=$TRAVIS_PYTHON_VERSION numpy scipy beautifulsoup4 six scikit-learn==0.19.1 joblib prettytable python-coveralls ruamel.yaml
+  - conda install --yes --channel defaults --channel conda-forge python=$TRAVIS_PYTHON_VERSION numpy scipy beautifulsoup4 six scikit-learn==0.20.1 joblib prettytable python-coveralls ruamel.yaml
   - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then conda install --yes --channel defaults configparser mock; fi
   - if [ ${WITH_PANDAS_AND_SEABORN} == "true" ]; then conda install --yes --channel defaults pandas seaborn; fi
   # Have to use pip for nose-cov because its entry points are not supported by conda yet
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e07b86d6..e0199044 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -114,5 +114,10 @@ documentation without the example gallery. The resulting HTML files will
 be placed in _build/html/ and are viewable in a web browser. See the
 README file in the doc/ directory for more information.
 
-For building the documentation, you will need [sphinx](http://sphinx.pocoo.org/).
+For building the documentation, you will need [sphinx](http://sphinx.pocoo.org/) as well as the readthedocs sphinx theme. To install both, just run:
+
+      conda install sphinx sphinx_rtd_theme
+
+in your existing conda environment.
+
 
diff --git a/README.rst b/README.rst
index 9bde7c4b..1e77ad8d 100644
--- a/README.rst
+++ b/README.rst
@@ -124,17 +124,17 @@ Requirements
 
 -  Python 2.7+
 -  `scikit-learn <http://scikit-learn.org/stable/>`__
--  `six <https://warehouse.python.org/project/six>`__
--  `PrettyTable <https://warehouse.python.org/project/PrettyTable>`__
+-  `six <https://pypi.org/project/six/>`__
+-  `PrettyTable <https://pypi.org/project/PrettyTable/>`__
 -  `BeautifulSoup 4 <http://www.crummy.com/software/BeautifulSoup/>`__
--  `Grid Map <https://warehouse.python.org/project/gridmap>`__ (only required if you plan
+-  `Grid Map <https://pypi.org/project/gridmap/>`__ (only required if you plan
    to run things in parallel on a DRMAA-compatible cluster)
--  `joblib <https://warehouse.python.org/project/joblib>`__
+-  `joblib <https://pypi.org/project/joblib/>`__
 -  `ruamel.yaml <http://yaml.readthedocs.io/en/latest/overview.html>`__
--  `configparser <https://warehouse.python.org/project/configparser>`__ (only required for
+-  `configparser <https://pypi.org/project/configparser/>`__ (only required for
    Python 2.7)
--  `logutils <https://warehouse.python.org/project/logutils>`__ (only required for Python 2.7)
--  `mock <https://warehouse.python.org/project/mock>`__ (only required for Python 2.7)
+-  `logutils <https://pypi.org/project/logutils/>`__ (only required for Python 2.7)
+-  `mock <https://pypi.org/project/mock/>`__ (only required for Python 2.7)
 
 The following packages can be optionally installed for additional features
 but are not required:
diff --git a/conda-recipe/README.md b/conda-recipe/README.md
index 9dae17fc..790f574a 100644
--- a/conda-recipe/README.md
+++ b/conda-recipe/README.md
@@ -1,7 +1,7 @@
 How to create and test conda package.
 
 1. To create the SKLL conda package run:
-   `conda build -c defaults -c conda-forge --python=3.6 --numpy=1.13 skll`
+   `conda build -c defaults -c conda-forge --python=3.6 --numpy=1.14 skll`
 2. Upload the package to anaconda.org using `anaconda upload <path>`.
 3. Test the package:
    `conda create -n foobar -c defaults -c conda-forge -c desilinguist python=3.6 skll=1.5`
diff --git a/conda-recipe/skll/meta.yaml b/conda-recipe/skll/meta.yaml
index 88c09935..ab63a602 100644
--- a/conda-recipe/skll/meta.yaml
+++ b/conda-recipe/skll/meta.yaml
@@ -1,6 +1,6 @@
 package:
   name: skll
-  version: 1.5
+  version: 1.5.3
 
 source:
   path: ../../../skll
@@ -42,7 +42,7 @@ build:
 requirements:
   build:
     - python
-    - scikit-learn ==0.19.1
+    - scikit-learn ==0.20.1
     - joblib >=0.8
     - setuptools
     - six
@@ -57,7 +57,7 @@ requirements:
 
   run:
     - python
-    - scikit-learn ==0.19.1
+    - scikit-learn ==0.20.1
     - joblib >=0.8
     - six
     - prettytable
diff --git a/conda_requirements.txt b/conda_requirements.txt
index b225501a..7cd12fe9 100644
--- a/conda_requirements.txt
+++ b/conda_requirements.txt
@@ -1,4 +1,4 @@
-scikit-learn==0.19.1
+scikit-learn==0.20.1
 six
 PrettyTable
 beautifulsoup4
diff --git a/doc/getting_started.rst b/doc/getting_started.rst
index 724ad40d..709bf7b9 100644
--- a/doc/getting_started.rst
+++ b/doc/getting_started.rst
@@ -11,7 +11,7 @@ or via ``conda`` (only for Python 3.6)::
     conda install -c defaults -c conda-forge -c desilinguist python=3.6 skll
 
 It can also be downloaded directly from
-`GitHub <http://github.com/EducationalTestingService/skll>`_.
+`GitHub <https://github.com/EducationalTestingService/skll>`_.
 
 
 License
diff --git a/doc/index.rst b/doc/index.rst
index c3a05b40..6fcd13fd 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -30,6 +30,7 @@ Documentation
    run_experiment
    utilities
    api
+   internal
 
 
 Indices and tables
diff --git a/doc/internal.rst b/doc/internal.rst
new file mode 100644
index 00000000..f8827b23
--- /dev/null
+++ b/doc/internal.rst
@@ -0,0 +1,7 @@
+Internal Documentation
+======================
+
+.. toctree::
+   :maxdepth: 4
+
+   internal/release
\ No newline at end of file
diff --git a/doc/internal/release.rst b/doc/internal/release.rst
new file mode 100644
index 00000000..fc06613b
--- /dev/null
+++ b/doc/internal/release.rst
@@ -0,0 +1,42 @@
+Release Process
+===============
+
+This document is only meant for the project administrators, not users and developers.
+
+1. Create a release branch on GitHub.
+
+2. In the release branch:
+
+   a. update the version numbers in ``version.py``.
+
+   b. update the conda recipe.
+
+   c. update the documentation with any new features or details about changes.
+
+   d. run ``make linkcheck`` on the documentation and fix any redirected/broken links.
+
+   e. update the README.
+
+3. Build the new conda package locally on your mac using the following command  (*Note*: you may have to replace the contents of the ``requirements()`` function in ``setup.py`` with a ``pass`` statement to get ``conda build`` to work)::
+
+    conda build -c defaults -c conda-forge --python=3.6 --numpy=1.14 skll
+
+4. Convert the package for both linux and windows::
+
+    conda convert -p win-64 -p linux-64 <mac package tarball>
+
+5. Upload each of the packages to anaconda.org using ``anaconda upload <package tarball>``.
+
+6. Upload source package to PyPI using ``python setup.py sdist upload``.
+
+7. Draft a release on GitHub.
+
+8. Make a pull request with the release branch to be merged into ``master`` and request code review.
+
+9. Once the build for the PR passes and the reviewers approve, merge the release branch into ``master``.
+
+10. Make sure that the RTFD build for ``master`` passes.
+
+11. Tag the latest commit in ``master`` with the appropriate release tag and publish the release on GitHub.
+
+12. Send an email around at ETS announcing the release and the changes.
diff --git a/doc/run_experiment.rst b/doc/run_experiment.rst
index b6b45604..27044171 100644
--- a/doc/run_experiment.rst
+++ b/doc/run_experiment.rst
@@ -31,7 +31,7 @@ The following feature file formats are supported:
 
 arff
 ^^^^
-The same file format used by `Weka <http://www.cs.waikato.ac.nz/ml/weka/>`__
+The same file format used by `Weka <https://www.cs.waikato.ac.nz/ml/weka/>`__
 with the following added restrictions:
 
 *   Only simple numeric, string, and nomimal values are supported.
@@ -81,8 +81,8 @@ libsvm
 ^^^^^^
 
 While we can process the standard input file format supported by
-`LibSVM <http://www.csie.ntu.edu.tw/~cjlin/libsvm/>`__,
-`LibLinear <http://www.csie.ntu.edu.tw/~cjlin/liblinear/>`__,
+`LibSVM <https://www.csie.ntu.edu.tw/~cjlin/libsvm/>`__,
+`LibLinear <https://www.csie.ntu.edu.tw/~cjlin/liblinear/>`__,
 and `SVMLight <http://svmlight.joachims.org>`__, we also support specifying
 extra metadata usually missing from the format in comments at the of each line.
 The comments are not mandatory, but without them, your labels and features will
@@ -105,7 +105,7 @@ megam
 ^^^^^
 
 An expanded form of the input format for the
-`MegaM classification package <http://www.umiacs.umd.edu/~hal/megam/>`__ with
+`MegaM classification package <http://users.umiacs.umd.edu/~hal/megam/>`__ with
 the ``-fvals`` switch.
 
 The basic format is::
@@ -127,7 +127,7 @@ to/from this MegaM format and for adding/removing features from the files.
 Creating configuration files
 ----------------------------
 The experiment configuration files that run_experiment accepts are standard
-`Python configuration files <http://docs.python.org/2/library/configparser.html>`__
+`Python configuration files <https://docs.python.org/2/library/configparser.html>`__
 that are similar in format to Windows INI files. [#]_
 There are four expected sections in a configuration file: :ref:`General`,
 :ref:`Input`, :ref:`Tuning`, and :ref:`Output`.  A detailed description of each
@@ -138,7 +138,7 @@ possible settings for each section is provided below, but to summarize:
 *   If you want to do **cross-validation**, specify a path to training feature
     files, and set :ref:`task` to ``cross_validate``. Please note that the
     cross-validation currently uses
-    `StratifiedKFold <http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.StratifiedKFold.html>`__.
+    `StratifiedKFold <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html>`__.
     You also can optionally use predetermined folds with the
     :ref:`folds_file <folds_file>` setting.
 
@@ -167,7 +167,7 @@ possible settings for each section is provided below, but to summarize:
 
 .. _learning_curve:
 
-*   If you want to **generate a learning curve** for your data, specify a training location and set :ref:`task` to ``learning_curve``. The learning curve is generated using essentially the same underlying process as in `scikit-learn <http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.learning_curve.html#sklearn.model_selection.learning_curve>`__ except that the SKLL feature pre-processing pipline is used while training the various models and computing the scores.
+*   If you want to **generate a learning curve** for your data, specify a training location and set :ref:`task` to ``learning_curve``. The learning curve is generated using essentially the same underlying process as in `scikit-learn <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.learning_curve.html#sklearn.model_selection.learning_curve>`__ except that the SKLL feature pre-processing pipline is used while training the various models and computing the scores.
 
     .. note::
 
@@ -178,7 +178,7 @@ possible settings for each section is provided below, but to summarize:
 *   A :ref:`list of classifiers/regressors <learners>` to try on your feature
     files is required.
 
-Example configuration files are available `here <https://github.com/EducationalTestingService/skll/blob/master/examples/>`__.
+Example configuration files are available `here <https://github.com/EducationalTestingService/skll/tree/master/examples/>`__.
 
 .. _general:
 
@@ -227,43 +227,43 @@ below.  Custom learners can also be specified. See
 
 Classifiers:
 
-    *   **AdaBoostClassifier**: `AdaBoost Classification <http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html#sklearn.ensemble.AdaBoostClassifier>`__.  Note that the default base estimator is a ``DecisionTreeClassifier``. A different base estimator can be used by specifying a ``base_estimator`` fixed parameter in the :ref:`fixed_parameters <fixed_parameters>` list. The following additional base estimators are supported: ``MultinomialNB``, ``SGDClassifier``, and ``SVC``. Note that the last two base require setting an additional ``algorithm`` fixed parameter with the value ``'SAMME'``.
-    *   **DummyClassifier**: `Simple rule-based Classification <http://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyClassifier.html#sklearn.dummy.DummyClassifier>`__
-    *   **DecisionTreeClassifier**: `Decision Tree Classification <http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html#sklearn.tree.DecisionTreeClassifier>`__
-    *   **GradientBoostingClassifier**: `Gradient Boosting Classification <http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html#sklearn.ensemble.GradientBoostingClassifier>`__
-    *   **KNeighborsClassifier**: `K-Nearest Neighbors Classification <http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html#sklearn.neighbors.KNeighborsClassifier>`__
-    *   **LinearSVC**: `Support Vector Classification using LibLinear <http://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html#sklearn.svm.LinearSVC>`__
-    *   **LogisticRegression**: `Logistic Regression Classification using LibLinear <http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html#sklearn.linear_model.LogisticRegression>`__
-    *   **MLPClassifier**: `Multi-layer Perceptron Classification <http://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html#sklearn.neural_network.MLPClassifier>`__
-    *   **MultinomialNB**: `Multinomial Naive Bayes Classification <http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.MultinomialNB.html#sklearn.naive_bayes.MultinomialNB>`__
-    *   **RandomForestClassifier**: `Random Forest Classification <http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html#sklearn.ensemble.RandomForestClassifier>`__
-    *   **RidgeClassifier**: `Classification using Ridge Regression <http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RidgeClassifier.html#sklearn.linear_model.RidgeClassifier>`__
-    *   **SGDClassifier**: `Stochastic Gradient Descent Classification <http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html>`__
-    *   **SVC**: `Support Vector Classification using LibSVM <http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC>`__
+    *   **AdaBoostClassifier**: `AdaBoost Classification <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html#sklearn.ensemble.AdaBoostClassifier>`__.  Note that the default base estimator is a ``DecisionTreeClassifier``. A different base estimator can be used by specifying a ``base_estimator`` fixed parameter in the :ref:`fixed_parameters <fixed_parameters>` list. The following additional base estimators are supported: ``MultinomialNB``, ``SGDClassifier``, and ``SVC``. Note that the last two base require setting an additional ``algorithm`` fixed parameter with the value ``'SAMME'``.
+    *   **DummyClassifier**: `Simple rule-based Classification <https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyClassifier.html#sklearn.dummy.DummyClassifier>`__
+    *   **DecisionTreeClassifier**: `Decision Tree Classification <https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html#sklearn.tree.DecisionTreeClassifier>`__
+    *   **GradientBoostingClassifier**: `Gradient Boosting Classification <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html#sklearn.ensemble.GradientBoostingClassifier>`__
+    *   **KNeighborsClassifier**: `K-Nearest Neighbors Classification <https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html#sklearn.neighbors.KNeighborsClassifier>`__
+    *   **LinearSVC**: `Support Vector Classification using LibLinear <https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html#sklearn.svm.LinearSVC>`__
+    *   **LogisticRegression**: `Logistic Regression Classification using LibLinear <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html#sklearn.linear_model.LogisticRegression>`__
+    *   **MLPClassifier**: `Multi-layer Perceptron Classification <https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html#sklearn.neural_network.MLPClassifier>`__
+    *   **MultinomialNB**: `Multinomial Naive Bayes Classification <https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.MultinomialNB.html#sklearn.naive_bayes.MultinomialNB>`__
+    *   **RandomForestClassifier**: `Random Forest Classification <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html#sklearn.ensemble.RandomForestClassifier>`__
+    *   **RidgeClassifier**: `Classification using Ridge Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RidgeClassifier.html#sklearn.linear_model.RidgeClassifier>`__
+    *   **SGDClassifier**: `Stochastic Gradient Descent Classification <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html>`__
+    *   **SVC**: `Support Vector Classification using LibSVM <https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC>`__
 
 .. _regressors:
 
 Regressors:
 
-    *   **AdaBoostRegressor**: `AdaBoost Regression <http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostRegressor.html#sklearn.ensemble.AdaBoostRegressor>`__. Note that the default base estimator is a ``DecisionTreeRegressor``. A different base estimator can be used by specifying a ``base_estimator`` fixed parameter in the :ref:`fixed_parameters <fixed_parameters>` list. The following additional base estimators are supported: ``SGDRegressor``, and ``SVR``.
-    *   **BayesianRidge**: `Bayesian Ridge Regression <http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.BayesianRidge.html#sklearn.linear_model.BayesianRidge>`__
-    *   **DecisionTreeRegressor**: `Decision Tree Regressor <http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html#sklearn.tree.DecisionTreeRegressor>`__
-    *   **DummyRegressor**: `Simple Rule-based Regression <http://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyRegressor.html#sklearn.dummy.DummyRegressor>`__
-    *   **ElasticNet**: `ElasticNet Regression <http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html#sklearn.linear_model.ElasticNet>`__
-    *   **GradientBoostingRegressor**: `Gradient Boosting Regressor <http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html#sklearn.ensemble.GradientBoostingRegressor>`__
-    *   **HuberRegressor**: `Huber Regression <http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.HuberRegressor.html#sklearn.linear_model.HuberRegressor>`__
-    *   **KNeighborsRegressor**: `K-Nearest Neighbors Regression <http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html#sklearn.neighbors.KNeighborsRegressor>`__
-    *   **Lars**: `Least Angle Regression <http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lars.html#sklearn.linear_model.Lars>`__
-    *   **Lasso**: `Lasso Regression <http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html#sklearn.linear_model.Lasso>`__
-    *   **LinearRegression**: `Linear Regression <http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression>`__
-    *   **LinearSVR**: `Support Vector Regression using LibLinear <http://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVR.html#sklearn.svm.LinearSVR>`__
-    *   **MLPRegressor**: `Multi-layer Perceptron Regression <http://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor>`__
-    *   **RandomForestRegressor**: `Random Forest Regression <http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html#sklearn.ensemble.RandomForestRegressor>`__
-    *   **RANSACRegressor**: `RANdom SAmple Consensus Regression <http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RANSACRegressor.html#sklearn.linear_model.RANSACRegressor>`__. Note that the default base estimator is a ``LinearRegression``. A different base regressor can be used by specifying a ``base_estimator`` fixed parameter in the :ref:`fixed_parameters <fixed_parameters>` list.
-    *   **Ridge**: `Ridge Regression <http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge>`__
-    *   **SGDRegressor**: `Stochastic Gradient Descent Regression <http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html>`__
-    *   **SVR**: `Support Vector Regression using LibSVM <http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html#sklearn.svm.SVR>`__
-    *   **TheilSenRegressor**: `Theil-Sen Regression <http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.TheilSenRegressor.html#sklearn.linear_model.TheilSenRegressor>`__
+    *   **AdaBoostRegressor**: `AdaBoost Regression <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostRegressor.html#sklearn.ensemble.AdaBoostRegressor>`__. Note that the default base estimator is a ``DecisionTreeRegressor``. A different base estimator can be used by specifying a ``base_estimator`` fixed parameter in the :ref:`fixed_parameters <fixed_parameters>` list. The following additional base estimators are supported: ``SGDRegressor``, and ``SVR``.
+    *   **BayesianRidge**: `Bayesian Ridge Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.BayesianRidge.html#sklearn.linear_model.BayesianRidge>`__
+    *   **DecisionTreeRegressor**: `Decision Tree Regressor <https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html#sklearn.tree.DecisionTreeRegressor>`__
+    *   **DummyRegressor**: `Simple Rule-based Regression <https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyRegressor.html#sklearn.dummy.DummyRegressor>`__
+    *   **ElasticNet**: `ElasticNet Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html#sklearn.linear_model.ElasticNet>`__
+    *   **GradientBoostingRegressor**: `Gradient Boosting Regressor <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html#sklearn.ensemble.GradientBoostingRegressor>`__
+    *   **HuberRegressor**: `Huber Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.HuberRegressor.html#sklearn.linear_model.HuberRegressor>`__
+    *   **KNeighborsRegressor**: `K-Nearest Neighbors Regression <https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html#sklearn.neighbors.KNeighborsRegressor>`__
+    *   **Lars**: `Least Angle Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lars.html#sklearn.linear_model.Lars>`__
+    *   **Lasso**: `Lasso Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html#sklearn.linear_model.Lasso>`__
+    *   **LinearRegression**: `Linear Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression>`__
+    *   **LinearSVR**: `Support Vector Regression using LibLinear <https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVR.html#sklearn.svm.LinearSVR>`__
+    *   **MLPRegressor**: `Multi-layer Perceptron Regression <https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor>`__
+    *   **RandomForestRegressor**: `Random Forest Regression <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html#sklearn.ensemble.RandomForestRegressor>`__
+    *   **RANSACRegressor**: `RANdom SAmple Consensus Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RANSACRegressor.html#sklearn.linear_model.RANSACRegressor>`__. Note that the default base estimator is a ``LinearRegression``. A different base regressor can be used by specifying a ``base_estimator`` fixed parameter in the :ref:`fixed_parameters <fixed_parameters>` list.
+    *   **Ridge**: `Ridge Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge>`__
+    *   **SGDRegressor**: `Stochastic Gradient Descent Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html>`__
+    *   **SVR**: `Support Vector Regression using LibSVM <https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html#sklearn.svm.SVR>`__
+    *   **TheilSenRegressor**: `Theil-Sen Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.TheilSenRegressor.html#sklearn.linear_model.TheilSenRegressor>`__
 
     For all regressors you can also prepend ``Rescaled`` to the
     beginning of the full name (e.g., ``RescaledSVR``) to get a version
@@ -496,9 +496,9 @@ imported dynamically.  This is only required if a custom learner is specified
 in the list of :ref:`learners`.
 
 All Custom learners must implement the ``fit`` and
-``predict`` methods. Custom classifiers must either (a) inherit from an existing scikit-learn classifier, or (b) inherit from both `sklearn.base.BaseEstimator <http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html>`__. *and* from `sklearn.base.ClassifierMixin <http://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html>`__.
+``predict`` methods. Custom classifiers must either (a) inherit from an existing scikit-learn classifier, or (b) inherit from both `sklearn.base.BaseEstimator <https://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html>`__. *and* from `sklearn.base.ClassifierMixin <https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html>`__.
 
-Similarly, Custom regressors must either (a) inherit from an existing scikit-learn regressor, or (b) inherit from both `sklearn.base.BaseEstimator <http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html>`__. *and* from `sklearn.base.RegressorMixin <http://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html>`__.
+Similarly, Custom regressors must either (a) inherit from an existing scikit-learn regressor, or (b) inherit from both `sklearn.base.BaseEstimator <https://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html>`__. *and* from `sklearn.base.RegressorMixin <https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html>`__.
 
 Learners that require dense matrices should implement a method ``requires_dense``
 that returns ``True``.
@@ -511,11 +511,11 @@ sampler *(Optional)*
 It performs a non-linear transformations of the input, which can serve
 as a basis for linear classification or other algorithms. Valid options
 are:
-`Nystroem <http://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.Nystroem.html#sklearn.kernel_approximation.Nystroem>`__,
-`RBFSampler <http://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.RBFSampler.html#sklearn.kernel_approximation.RBFSampler>`__,
-`SkewedChi2Sampler <http://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.SkewedChi2Sampler.html#sklearn.kernel_approximation.SkewedChi2Sampler>`__, and
-`AdditiveChi2Sampler <http://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.AdditiveChi2Sampler.html#sklearn.kernel_approximation.AdditiveChi2Sampler>`__.  For additional information see
-`the scikit-learn documentation <http://scikit-learn.org/stable/modules/kernel_approximation.html>`__.
+`Nystroem <https://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.Nystroem.html#sklearn.kernel_approximation.Nystroem>`__,
+`RBFSampler <https://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.RBFSampler.html#sklearn.kernel_approximation.RBFSampler>`__,
+`SkewedChi2Sampler <https://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.SkewedChi2Sampler.html#sklearn.kernel_approximation.SkewedChi2Sampler>`__, and
+`AdditiveChi2Sampler <https://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.AdditiveChi2Sampler.html#sklearn.kernel_approximation.AdditiveChi2Sampler>`__.  For additional information see
+`the scikit-learn documentation <https://scikit-learn.org/stable/modules/kernel_approximation.html>`__.
 
 .. _sampler_parameters:
 
@@ -550,18 +550,18 @@ feature_hasher *(Optional)*
 If "true", this enables a high-speed, low-memory vectorizer that uses
 feature hashing for converting feature dictionaries into NumPy arrays
 instead of using a
-`DictVectorizer <http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.DictVectorizer.html>`__.  This flag will drastically
+`DictVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.DictVectorizer.html>`__.  This flag will drastically
 reduce memory consumption for data sets with a large number of
 features. If enabled, the user should also specify the number of
 features in the :ref:`hasher_features <hasher_features>` field.  For additional
-information see `the scikit-learn documentation <http://scikit-learn.org/stable/modules/feature_extraction.html#feature-hashing>`__.
+information see `the scikit-learn documentation <https://scikit-learn.org/stable/modules/feature_extraction.html#feature-hashing>`__.
 
 .. _hasher_features:
 
 hasher_features *(Optional)*
 """"""""""""""""""""""""""""
 
-The number of features used by the `FeatureHasher <http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.FeatureHasher.html>`__ if the
+The number of features used by the `FeatureHasher <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.FeatureHasher.html>`__ if the
 :ref:`feature_hasher <feature_hasher>` flag is enabled.
 
 .. note::
@@ -696,7 +696,7 @@ TheilSenRegressor
 
        {'class_weight': {1: 10}}
 
-    Additional examples and information can be seen `here <http://scikit-learn.org/stable/auto_examples/linear_model/plot_sgd_weighted_labels.html>`__.
+    Additional examples and information can be seen `here <https://scikit-learn.org/stable/auto_examples/linear_model/plot_sgd_weighted_samples.html>`__.
 
 .. _feature_scaling:
 
@@ -785,9 +785,9 @@ The objective functions to use for tuning. This is a list of one or more objecti
 
 Classification:
 
-    *   **accuracy**: Overall `accuracy <http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html>`__
-    *   **precision**: `Precision <http://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_score.html>`__
-    *   **recall**: `Recall <http://scikit-learn.org/stable/modules/generated/sklearn.metrics.recall_score.html>`__
+    *   **accuracy**: Overall `accuracy <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html>`__
+    *   **precision**: `Precision <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_score.html>`__
+    *   **recall**: `Recall <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.recall_score.html>`__
     *   **f1**: The default scikit-learn |F1 link|_
         (F\ :sub:`1` of the positive class for binary classification, or the weighted average F\ :sub:`1` for multiclass classification)
     *   **f1_score_micro**: Micro-averaged |F1 link|_
@@ -796,20 +796,20 @@ Classification:
     *   **f1_score_least_frequent**: F:\ :sub:`1` score of the least frequent
         class. The least frequent class may vary from fold to fold for certain
         data distributions.
-    * **neg_log_loss**: The negative of the classification `log loss <http://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html>`__ . Since scikit-learn `recommends <http://scikit-learn.org/stable/modules/model_evaluation.html#common-cases-predefined-values>`__ using negated loss functions as scorer functions, SKLL does the same for the sake of consistency. To use this as the objective, :ref:`probability <probability>` must be set to ``True``.
-    *   **average_precision**: `Area under PR curve <http://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html>`__
+    * **neg_log_loss**: The negative of the classification `log loss <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html>`__ . Since scikit-learn `recommends <https://scikit-learn.org/stable/modules/model_evaluation.html#common-cases-predefined-values>`__ using negated loss functions as scorer functions, SKLL does the same for the sake of consistency. To use this as the objective, :ref:`probability <probability>` must be set to ``True``.
+    *   **average_precision**: `Area under PR curve <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html>`__
         (for binary classification)
-    *   **roc_auc**: `Area under ROC curve <http://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_auc_score.html>`__
+    *   **roc_auc**: `Area under ROC curve <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_auc_score.html>`__
         (for binary classification)
 
 .. |F1 link| replace:: F\ :sub:`1` score
-.. _F1 link: http://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html
+.. _F1 link: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html
 
 .. _int_label_classification_obj:
 
 Regression or classification with integer labels:
 
-    *   **unweighted_kappa**: Unweighted `Cohen's kappa <http://en.wikipedia.org/wiki/Cohen's_kappa>`__ (any floating point
+    *   **unweighted_kappa**: Unweighted `Cohen's kappa <https://en.wikipedia.org/wiki/Cohen's_kappa>`__ (any floating point
         values are rounded to ints)
     *   **linear_weighted_kappa**: Linear weighted kappa (any floating
         point values are rounded to ints)
@@ -827,16 +827,16 @@ Regression or classification with integer labels:
 
 Regression or classification with binary labels:
 
-    *   **kendall_tau**: `Kendall's tau <http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient>`__
-    *   **pearson**: `Pearson correlation <http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient>`__
-    *   **spearman**: `Spearman rank-correlation <http://en.wikipedia.org/wiki/Spearman's_rank_correlation_coefficient>`__
+    *   **kendall_tau**: `Kendall's tau <https://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient>`__
+    *   **pearson**: `Pearson correlation <https://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient>`__
+    *   **spearman**: `Spearman rank-correlation <https://en.wikipedia.org/wiki/Spearman's_rank_correlation_coefficient>`__
 
 .. _regression_obj:
 
 Regression:
 
-    *   **r2**: `R2 <http://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html>`__
-    *   **neg_mean_squared_error**: The negative of the `mean squared error <http://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html>`__ regression loss. Since scikit-learn `recommends <http://scikit-learn.org/stable/modules/model_evaluation.html#common-cases-predefined-values>`__ using negated loss functions as scorer functions, SKLL does the same for the sake of consistency.
+    *   **r2**: `R2 <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html>`__
+    *   **neg_mean_squared_error**: The negative of the `mean squared error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html>`__ regression loss. Since scikit-learn `recommends <https://scikit-learn.org/stable/modules/model_evaluation.html#common-cases-predefined-values>`__ using negated loss functions as scorer functions, SKLL does the same for the sake of consistency.
 
 
 Defaults to ``['f1_score_micro']``.
@@ -1121,7 +1121,7 @@ specified via command-line arguments instead of in the configuration file:
 GridMap options
 ^^^^^^^^^^^^^^^
 
-If you have `GridMap <http://pypi.python.org/pypi/gridmap>`__ installed,
+If you have `GridMap <https://pypi.org/project/gridmap/>`__ installed,
 :program:`run_experiment` will automatically schedule jobs on your DRMAA-
 compatible cluster. You can use the following options to customize this
 behavior.
@@ -1132,7 +1132,7 @@ behavior.
 
 .. option:: -q <queue>, --queue <queue>
 
-    Use this queue for `GridMap <http://pypi.python.org/pypi/gridmap>`__.
+    Use this queue for `GridMap <https://pypi.org/project/gridmap/>`__.
     (default: ``all.q``)
 
 .. option:: -m <machines>, --machines <machines>
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
index 9d459623..2c85d7a7 100644
--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@@ -21,15 +21,15 @@ Titanic Example
 ---------------
 
 Let's see how we can apply the basic workflow above to a simple example using
-the `Titantic: Machine Learning from Disaster <http://www.kaggle.com/c/titanic-gettingStarted/>`__
-data from `Kaggle <http://www.kaggle.com>`__.
+the `Titantic: Machine Learning from Disaster <https://www.kaggle.com/c/titanic/>`__
+data from `Kaggle <https://www.kaggle.com>`__.
 
 Get your data into the correct format
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The first step to getting the Titanic data is logging into Kaggle and
-downloading `train.csv <http://www.kaggle.com/c/titanic-gettingStarted/download/train.csv>`__
-and `test.csv <http://www.kaggle.com/c/titanic-gettingStarted/download/test.csv>`__.
+downloading `train.csv <https://www.kaggle.com/c/titanic-gettingStarted/download/train.csv>`__
+and `test.csv <https://www.kaggle.com/c/titanic-gettingStarted/download/test.csv>`__.
 Once you have those files, you'll also want to grab the
 `examples folder <https://github.com/EducationalTestingService/skll/tree/master/examples>`__
 on our GitHub page and put ``train.csv`` and ``test.csv`` in ``examples``.
@@ -48,7 +48,7 @@ For this tutorial, we will refer to an "experiment" as having a single data set
 split into training and testing portions.  As part of each
 experiment, we can train and test several models, either simultaneously or
 sequentially, depending whether we're using
-`GridMap <https://github.com/EducationalTestingService/gridmap>`__ or not.
+`GridMap <https://pypi.org/project/gridmap/>`__ or not.
 This will be described in more detail later on, when we are ready to run our
 experiment.
 
@@ -87,9 +87,9 @@ instances IDs for each example.
 
 The :ref:`Tuning` section defines how we want our model to be tuned.  Setting
 :ref:`grid_search <grid_search>` to ``True`` here employs scikit-learn's
-`GridSearchCV <http://scikit-learn.org/stable/modules/generated/sklearn.grid_search.GridSearchCV.html#sklearn.grid_search.GridSearchCV>`_
+`GridSearchCV <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV>`_
 class, which is an implementation of the
-`standard, brute-force approach to hyperparameter optimization <http://en.wikipedia.org/wiki/Hyperparameter_optimization#Grid_search>`_.
+`standard, brute-force approach to hyperparameter optimization <https://en.wikipedia.org/wiki/Hyperparameter_optimization#Grid_search>`_.
 
 :ref:`objectives <objectives>` refers to the desired objective functions; here,
 ``accuracy`` will optimize for overall accuracy.  You can see a list of all the
diff --git a/doc/utilities.rst b/doc/utilities.rst
index 41319140..e509e535 100644
--- a/doc/utilities.rst
+++ b/doc/utilities.rst
@@ -113,17 +113,27 @@ Positional Arguments
 
     Model file to load and use for generating predictions.
 
-.. option:: input_file
+.. option:: input_file(s)
 
-    A csv file, json file, or megam file (with or without the label column),
-    with the appropriate suffix.
+    One or more csv file(s), jsonlines file(s), or megam file(s) (with or without the
+    label column), with the appropriate suffix.
 
 Optional Arguments
 ^^^^^^^^^^^^^^^^^^
+.. option:: -a, --all_probabilities
+
+    Flag indicating whether to output the probabilities of all labels instead of just
+    the probability of the positive label.
+
+.. option:: -i <id_col>, --id_col <id_col>
+
+    Name of the column which contains the instance IDs in ARFF, CSV, or TSV files.
+    (default: ``id``)
+
 .. option:: -l <label_col>, --label_col <label_col>
 
     Name of the column which contains the labels in ARFF, CSV, or TSV files.
-    For ARFF files, this must be the final column to count as the label.
+    For ARFF files, this must be the final column to count as the label. 
     (default: ``y``)
 
 .. option:: -p <positive_label>, --positive_label <positive_label>
@@ -131,7 +141,8 @@ Optional Arguments
     If the model is only being used to predict the probability of a particular
     label, this specifies the index of the label we're predicting. 1 = second
     label, which is default for binary classification. Keep in mind that labels
-    are sorted lexicographically. (default: 1)
+    are sorted lexicographically. 
+    (default: 1)
 
 .. option:: -q, --quiet
 
diff --git a/requirements.txt b/requirements.txt
index b225501a..7cd12fe9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-scikit-learn==0.19.1
+scikit-learn==0.20.1
 six
 PrettyTable
 beautifulsoup4
diff --git a/requirements_rtd.txt b/requirements_rtd.txt
index 72b70cda..1b63aabb 100644
--- a/requirements_rtd.txt
+++ b/requirements_rtd.txt
@@ -1,7 +1,7 @@
 configparser==3.5.0b2
 logutils
 mock
-scikit-learn==0.19.1
+scikit-learn==0.20.1
 six
 PrettyTable
 beautifulsoup4
diff --git a/skll/config.py b/skll/config.py
index 973d65f4..60318b4c 100644
--- a/skll/config.py
+++ b/skll/config.py
@@ -481,10 +481,11 @@ def _parse_config_file(config_path, log_level=logging.INFO):
     # next, get the log path before anything else since we need to
     # save all logging messages to a log file in addition to displaying
     # them on the console
-    log_path = _locate_file(config.get("Output", "log"), config_dir)
-    if log_path:
-        log_path = join(config_dir, log_path)
-        if not exists(log_path):
+    try:
+        log_path = _locate_file(config.get("Output", "log"), config_dir)
+    except IOError as e:
+        if e.errno == errno.ENOENT:
+            log_path = e.filename
             os.makedirs(log_path)
 
     # Create a top-level log file under the log path
@@ -731,24 +732,29 @@ def _parse_config_file(config_path, log_level=logging.INFO):
     probability = config.getboolean("Output", "probability")
 
     # do we want to keep the predictions?
-    prediction_dir = _locate_file(config.get("Output", "predictions"),
-                                  config_dir)
-    if prediction_dir:
-        if not exists(prediction_dir):
+    # make sure the predictions path exists and if not create it
+    try:
+        prediction_dir = _locate_file(config.get("Output", "predictions"),
+                                      config_dir)
+    except IOError as e:
+        if e.errno == errno.ENOENT:
+            prediction_dir = e.filename
             os.makedirs(prediction_dir)
 
-    # make sure model path exists
-    model_path = _locate_file(config.get("Output", "models"), config_dir)
-    if model_path:
-        model_path = join(config_dir, model_path)
-        if not exists(model_path):
+    # make sure model path exists and if not, create it
+    try:
+        model_path = _locate_file(config.get("Output", "models"), config_dir)
+    except IOError as e:
+        if e.errno == errno.ENOENT:
+            model_path = e.filename
             os.makedirs(model_path)
 
     # make sure results path exists
-    results_path = _locate_file(config.get("Output", "results"), config_dir)
-    if results_path:
-        results_path = join(config_dir, results_path)
-        if not exists(results_path):
+    try:
+        results_path = _locate_file(config.get("Output", "results"), config_dir)
+    except IOError as e:
+        if e.errno == errno.ENOENT:
+            results_path = e.filename
             os.makedirs(results_path)
 
     # what are the output metrics?
@@ -872,10 +878,10 @@ def _parse_config_file(config_path, log_level=logging.INFO):
             logger.warning("Specifying \"folds_file\" overrides both "
                            "explicit and default \"grid_search_folds\".")
     if task == 'cross_validate':
-        logger.warning("Specifying \"folds_file\" overrides both "
-                       "explicit and default \"num_cv_folds\".")
         cv_folds = specified_folds_mapping if specified_folds_mapping else specified_num_folds
         if specified_folds_mapping:
+            logger.warning("Specifying \"folds_file\" overrides both "
+                           "explicit and default \"num_cv_folds\".")
             if use_folds_file_for_grid_search:
                 grid_search_folds = cv_folds
             else:
diff --git a/skll/data/writers.py b/skll/data/writers.py
index d694ff78..88b9f0fd 100644
--- a/skll/data/writers.py
+++ b/skll/data/writers.py
@@ -237,24 +237,24 @@ class DelimitedFileWriter(Writer):
         type. For example ``/foo/.csv``.
     feature_set : skll.FeatureSet
         The ``FeatureSet`` instance to dump to the output file.
-    quiet : bool
+    quiet : bool, optional
         Do not print "Writing..." status message to stderr.
         Defaults to ``True``.
-    label_col : str
+    label_col : str, optional
         Name of the column which contains the class labels
         for ARFF/CSV/TSV files. If no column with that name
         exists, or ``None`` is specified, the data is
         considered to be unlabelled.
         Defaults to ``'y'``.
-    id_col : str
+    id_col : str, optional
         Name of the column which contains the instance IDs.
         If no column with that name exists, or ``None`` is
         specified, example IDs will be automatically generated.
         Defaults to ``'id'``.
-    dialect : str
-        Name of the column which contains the class labels for
-        CSV/TSV files.
-    logger : logging.Logger
+    dialect : str, optional
+        The dialect to use for writing out the delimited file.
+        Defaults to ``'excel-tab'``.
+    logger : logging.Logger, optional
         A logger instance to use to log messages instead of creating
         a new one by default.
         Defaults to ``None``.
@@ -586,9 +586,21 @@ def _write_line(self, id_, label_, feat_dict, output_file):
         """
         example_dict = {}
         # Don't try to add class column if this is label-less data
+        # Try to convert the label to a scalar assuming it'a numpy
+        # non-scalar type (e.g., int64) but if that doesn't work
+        # then use it as is
         if self.feat_set.has_labels:
-            example_dict['y'] = np.asscalar(label_)
-        example_dict['id'] = np.asscalar(id_)
+            try:
+                example_dict['y'] = label_.item()
+            except AttributeError:
+                example_dict['y'] = label_
+        # Try to convert the ID to a scalar assuming it'a numpy
+        # non-scalar type (e.g., int64) but if that doesn't work
+        # then use it as is
+        try:
+            example_dict['id'] = id_.item()
+        except AttributeError:
+            example_dict['id'] = id_
         example_dict["x"] = feat_dict
         print(json.dumps(example_dict, sort_keys=True), file=output_file)
 
diff --git a/skll/experiments.py b/skll/experiments.py
index d6a039cc..22ff3bb1 100644
--- a/skll/experiments.py
+++ b/skll/experiments.py
@@ -77,7 +77,7 @@ class NumpyTypeEncoder(json.JSONEncoder):
     be serialized by the json module, so we must convert them to int objects.
 
     A related issue where this was adapted from:
-    http://stackoverflow.com/questions/11561932/why-does-json-dumpslistnp-arange5-fail-while-json-dumpsnp-arange5-tolis
+    https://stackoverflow.com/questions/11561932/why-does-json-dumpslistnp-arange5-fail-while-json-dumpsnp-arange5-tolis
     """
 
     def default(self, obj):
@@ -989,7 +989,7 @@ def run_configuration(config_file, local=False, overwrite=True, queue='all.q',
     write_summary : bool, optional
         Write a TSV file with a summary of the results.
         Defaults to ``True``.
-    quite : bool, optional
+    quiet : bool, optional
         Suppress printing of "Loading..." messages.
         Defaults to ``False``.
     ablation : int, optional
@@ -1374,13 +1374,13 @@ def _generate_learning_curve_plots(experiment_name,
     # each of the featuresets
     for fs_name, df_fs in df_melted.groupby('featureset_name'):
         fig = plt.figure();
-        fig.set_size_inches(2.5*num_learners, 2.5*num_metrics);
+        fig.set_size_inches(2.5 * num_learners, 2.5 * num_metrics);
 
         # compute ylimits for this feature set for each objective
         with sns.axes_style('whitegrid', {"grid.linestyle": ':',
                                           "xtick.major.size": 3.0}):
             g = sns.FacetGrid(df_fs, row="metric", col="learner_name",
-                              hue="variable", size=2.5, aspect=1,
+                              hue="variable", height=2.5, aspect=1,
                               margin_titles=True, despine=True, sharex=False,
                               sharey=False, legend_out=False, palette="Set1")
             colors = train_color, test_color = sns.color_palette("Set1")[:2]
diff --git a/skll/learner.py b/skll/learner.py
index ff196f78..f8bd102d 100644
--- a/skll/learner.py
+++ b/skll/learner.py
@@ -20,6 +20,7 @@
 from collections import Counter, defaultdict
 from functools import wraps
 from importlib import import_module
+from itertools import combinations
 from multiprocessing import cpu_count
 
 import joblib
@@ -42,6 +43,7 @@
                               RandomForestClassifier,
                               RandomForestRegressor)
 from sklearn.feature_extraction import FeatureHasher
+from sklearn.feature_extraction import DictVectorizer as OldDictVectorizer
 from sklearn.feature_selection import SelectKBest
 from sklearn.utils.multiclass import type_of_target
 # AdditiveChi2Sampler is used indirectly, so ignore linting message
@@ -74,6 +76,7 @@
 from sklearn.utils import shuffle as sk_shuffle
 
 from skll.data import FeatureSet
+from skll.data.dict_vectorizer import DictVectorizer
 from skll.metrics import _CORRELATION_METRICS, use_score_func
 from skll.version import VERSION
 
@@ -196,8 +199,6 @@
                                   'neg_log_loss'])
 
 _REQUIRES_DENSE = (BayesianRidge,
-                   GradientBoostingClassifier,
-                   GradientBoostingRegressor,
                    Lars,
                    TheilSenRegressor)
 
@@ -856,6 +857,7 @@ def __init__(self, model_type, probability=False, feature_scaling='none',
         if issubclass(self._model_type, SVC):
             self._model_kwargs['cache_size'] = 1000
             self._model_kwargs['probability'] = self.probability
+            self._model_kwargs['gamma'] = 'auto'
             if self.probability:
                 self.logger.warning('Because LibSVM does an internal '
                                     'cross-validation to produce probabilities, '
@@ -868,14 +870,22 @@ def __init__(self, model_type, probability=False, feature_scaling='none',
             self._model_kwargs['n_estimators'] = 500
         elif issubclass(self._model_type, SVR):
             self._model_kwargs['cache_size'] = 1000
+            self._model_kwargs['gamma'] = 'auto'
         elif issubclass(self._model_type, SGDClassifier):
             self._model_kwargs['loss'] = 'log'
+            self._model_kwargs['max_iter'] = None
+            self._model_kwargs['tol'] = None
+        elif issubclass(self._model_type, SGDRegressor):
+            self._model_kwargs['max_iter'] = None
+            self._model_kwargs['tol'] = None
         elif issubclass(self._model_type, RANSACRegressor):
             self._model_kwargs['loss'] = 'squared_loss'
         elif issubclass(self._model_type, (MLPClassifier, MLPRegressor)):
             self._model_kwargs['learning_rate'] = 'invscaling'
             self._model_kwargs['max_iter'] = 500
-
+        elif issubclass(self._model_type, LogisticRegression):
+            self._model_kwargs['solver'] = 'liblinear'
+            self._model_kwargs['multi_class'] = 'auto'
 
         if issubclass(self._model_type,
                       (AdaBoostClassifier, AdaBoostRegressor,
@@ -913,15 +923,24 @@ def __init__(self, model_type, probability=False, feature_scaling='none',
                            AdaBoostClassifier,
                            RANSACRegressor)) and ('base_estimator' in model_kwargs):
                 base_estimator_name = model_kwargs['base_estimator']
-                base_estimator_kwargs = {} if base_estimator_name in ['LinearRegression',
-                                                                      'MultinomialNB',
-                                                                      'SVR'] else {'random_state': 123456789}
+                if base_estimator_name in ['LinearRegression', 'MultinomialNB']:
+                    base_estimator_kwargs = {}
+                elif base_estimator_name in ['SGDClassifier', 'SGDRegressor']:
+                    base_estimator_kwargs = {'max_iter': None,
+                                             'tol': None,
+                                             'random_state': 123456789}
+                elif base_estimator_name == 'SVR':
+                    base_estimator_kwargs = {'gamma': 'auto'}
+                elif base_estimator_name == 'SVC':
+                    base_estimator_kwargs = {'gamma': 'auto', 'random_state': 123456789}
+                else:
+                    base_estimator_kwargs = {'random_state': 123456789}
                 base_estimator = globals()[base_estimator_name](**base_estimator_kwargs)
                 model_kwargs['base_estimator'] = base_estimator
             self._model_kwargs.update(model_kwargs)
 
     @classmethod
-    def from_file(cls, learner_path):
+    def from_file(cls, learner_path, logger=None):
         """
         Load a saved ``Learner`` instance from a file path.
 
@@ -929,6 +948,9 @@ def from_file(cls, learner_path):
         ----------
         learner_path : str
             The path to a saved ``Learner`` instance file.
+        logger : logging object, optional
+            A logging object. If ``None`` is passed, get logger from ``__name__``.
+            Defaults to ``None``.
 
         Returns
         -------
@@ -944,6 +966,10 @@ def from_file(cls, learner_path):
         """
         skll_version, learner = joblib.load(learner_path)
 
+        # create the learner logger attribute to the logger that's passed in
+        # or if nothing was passed in, then a new logger should be linked
+        learner.logger = logger if logger else logging.getLogger(__name__)
+
         # For backward compatibility, convert string model types to labels.
         if isinstance(learner._model_type, string_types):
             learner._model_type = globals()[learner._model_type]
@@ -1073,6 +1099,30 @@ def model_params(self):
             elif self.model.intercept_.any():
                 intercept = dict(zip(label_list, self.model.intercept_))
 
+        # for SVCs with linear kernels, we want to print out the primal
+        # weights - that is, the weights for each feature for each one-vs-one
+        # binary classifier. These are the weights contained in the `coef_`
+        # attribute of the underlying scikit-learn model. This is a matrix that
+        # has the shape [(n_classes)*(n_classes -1)/2, n_features] since there
+        # are C(n_classes, 2) = n_classes*(n_classes-1)/2 one-vs-one classifiers
+        # and each one has weights for each of the features. According to the
+        # scikit-learn user guide and the code for the function `_one_vs_one_coef()`
+        # in `svm/base.py`, the order of the rows is as follows is "0 vs 1",
+        # "0 vs 2", ... "0 vs n", "1 vs 2", "1 vs 3", "1 vs n", ... "n-1 vs n".
+        elif isinstance(self._model, SVC) and self._model.kernel == 'linear':
+            intercept = {}
+            for i, class_pair in enumerate(combinations(range(len(self.label_list)), 2)):
+                coef = self.model.coef_[i]
+                coef = coef.toarray()
+                coef = self.feat_selector.inverse_transform(coef)[0]
+                class1 = self.label_list[class_pair[0]]
+                class2 = self.label_list[class_pair[1]]
+                for feat, idx in iteritems(self.feat_vectorizer.vocabulary_):
+                    if coef[idx]:
+                        res['{}-vs-{}\t{}'.format(class1, class2, feat)] = coef[idx]
+
+                intercept['{}-vs-{}'.format(class1, class2)] = self.model.intercept_[i]
+
         else:
             # not supported
             raise ValueError(("{} is not supported by" +
@@ -1113,7 +1163,8 @@ def __getstate__(self):
         because we cannot pickle loggers.
         """
         attribute_dict = dict(self.__dict__)
-        del attribute_dict['logger']
+        if 'logger' in attribute_dict:
+            del attribute_dict['logger']
         return attribute_dict
 
     def save(self, learner_path):
@@ -1683,12 +1734,57 @@ def predict(self, examples, prediction_prefix=None, append=False,
         # Need to do some transformations so the features are in the right
         # columns for the test set. Obviously a bit hacky, but storing things
         # in sparse matrices saves memory over our old list of dicts approach.
-        if isinstance(self.feat_vectorizer, FeatureHasher):
-            if (self.feat_vectorizer.n_features !=
-                    examples.vectorizer.n_features):
+
+        # We also need to think about the various combinations of the model
+        # vectorizer and the vectorizer for the set for which we want to make
+        # predictions:
+
+        # 1. Both vectorizers are DictVectorizers. If they use different sets
+        # of features, we raise a warning and transform the features of the
+        # prediction set from its space to the trained model space.
+
+        # 2. Both vectorizers are FeatureHashers. If they use different number
+        # of feature bins, we should just raise an error since there's no
+        # inverse_transform() available for a FeatureHasher - the hash function
+        # is not reversible.
+
+        # 3. The model vectorizer is a FeatureHasher but the prediction feature
+        # set vectorizer is a DictVectorizer. We should be able to handle this
+        # case, since we can just call inverse_transform() on the DictVectorizer
+        # and then transform() on the FeatureHasher?
+
+        # 4. The model vectorizer is a DictVectorizer but the prediction feature
+        # set vectorizer is a FeatureHasher. Again, we should raise an error here
+        # since there's no inverse available for the hasher.
+        model_is_dict = isinstance(self.feat_vectorizer,
+                                   (DictVectorizer, OldDictVectorizer))
+        model_is_hasher = isinstance(self.feat_vectorizer, FeatureHasher)
+        data_is_dict = isinstance(examples.vectorizer,
+                                  (DictVectorizer, OldDictVectorizer))
+        data_is_hasher = isinstance(examples.vectorizer, FeatureHasher)
+
+        both_dicts = model_is_dict and data_is_dict
+        both_hashers = model_is_hasher and data_is_hasher
+        model_hasher_and_data_dict = model_is_hasher and data_is_dict
+        model_dict_and_data_hasher = model_is_dict and data_is_hasher
+
+        # 1. both are DictVectorizers
+        if both_dicts:
+            if (set(self.feat_vectorizer.feature_names_) !=
+                    set(examples.vectorizer.feature_names_)):
                 self.logger.warning("There is mismatch between the training model "
-                                    "features and the data passed to predict.")
+                                    "features and the data passed to predict. The "
+                                    "prediction features will be transformed to "
+                                    "the trained model space.")
+            if self.feat_vectorizer == examples.vectorizer:
+                xtest = examples.features
+            else:
+                xtest = self.feat_vectorizer.transform(
+                    examples.vectorizer.inverse_transform(
+                        examples.features))
 
+        # 2. both are FeatureHashers
+        elif both_hashers:
             self_feat_vec_tuple = (self.feat_vectorizer.dtype,
                                    self.feat_vectorizer.input_type,
                                    self.feat_vectorizer.n_features,
@@ -1701,21 +1797,23 @@ def predict(self, examples, prediction_prefix=None, append=False,
             if self_feat_vec_tuple == example_feat_vec_tuple:
                 xtest = examples.features
             else:
-                xtest = self.feat_vectorizer.transform(
-                    examples.vectorizer.inverse_transform(
-                        examples.features))
-        else:
-            if (set(self.feat_vectorizer.feature_names_) !=
-                    set(examples.vectorizer.feature_names_)):
-                self.logger.warning("There is mismatch between the training model "
-                                    "features and the data passed to predict.")
-            if self.feat_vectorizer == examples.vectorizer:
-                xtest = examples.features
-            else:
-
-                xtest = self.feat_vectorizer.transform(
-                    examples.vectorizer.inverse_transform(
-                        examples.features))
+                self.logger.error('There is mismatch between the FeatureHasher '
+                                  'configuration for the training data and '
+                                  'the configuration for the data passed to predict')
+                raise RuntimeError('Mismatched hasher configurations')
+
+        # 3. model is a FeatureHasher and test set is a DictVectorizer
+        elif model_hasher_and_data_dict:
+            xtest = self.feat_vectorizer.transform(
+                examples.vectorizer.inverse_transform(
+                    examples.features))
+
+        # 4. model is a DictVectorizer and test set is a FeatureHasher
+        elif model_dict_and_data_hasher:
+            self.logger.error('Cannot predict with a model using a '
+                              'DictVectorizer on data that uses '
+                              'a FeatureHasher')
+            raise RuntimeError('Cannot use FeatureHasher for data')
 
         # filter features based on those selected from training set
         xtest = self.feat_selector.transform(xtest)
diff --git a/skll/utilities/generate_predictions.py b/skll/utilities/generate_predictions.py
index 97d9aefb..514ff680 100755
--- a/skll/utilities/generate_predictions.py
+++ b/skll/utilities/generate_predictions.py
@@ -14,6 +14,7 @@
 import argparse
 import logging
 import os
+import sys
 
 from skll.data.readers import EXT_TO_READER
 from skll.learner import Learner
@@ -26,7 +27,8 @@ class Predictor(object):
     predictions for feature strings.
     """
 
-    def __init__(self, model_path, threshold=None, positive_label=1):
+    def __init__(self, model_path, threshold=None, positive_label=1,
+                 all_labels=False, logger=None):
         """
         Initialize the predictor.
 
@@ -46,10 +48,24 @@ def __init__(self, model_path, threshold=None, positive_label=1):
             predicting. 1 = second class, which is default
             for binary classification.
             Defaults to 1.
+        all_labels: bool, optional
+            A flag indicating whether to return the probabilities for all
+            labels in each row instead of just returning the probability of
+            `positive_label`. Defaults to None.
+        logger : logging object, optional
+            A logging object. If ``None`` is passed, get logger from ``__name__``.
+            Defaults to ``None``.
         """
+        # self.logger = logger if logger else logging.getLogger(__name__)
+        if threshold is not None and all_labels:
+            raise ValueError("`threshold` and `all_labels` are mutually "
+                             "exclusive. They can not both be set to True.")
+
         self._learner = Learner.from_file(model_path)
         self._pos_index = positive_label
         self.threshold = threshold
+        self.all_labels = all_labels
+        self.output_file_header = None
 
     def predict(self, data):
         """
@@ -67,18 +83,29 @@ def predict(self, data):
         # compute the predictions from the learner
         preds = self._learner.predict(data)
         preds = preds.tolist()
+        labels = self._learner.label_list
 
+        # Create file header list, and transform predictions as needed
+        # depending on the specified prediction arguments.
         if self._learner.probability:
-            if self.threshold is None:
-                return [pred[self._pos_index] for pred in preds]
+            if self.all_labels:
+                self.output_file_header = ["id"] + [str(x) for x in labels]
+            elif self.threshold is None:
+                label = self._learner.label_dict[self._pos_index]
+                self.output_file_header = ["id",
+                                           "Probability of '{}'".format(label)]
+                preds = [pred[self._pos_index] for pred in preds]
             else:
-                return [int(pred[self._pos_index] >= self.threshold)
-                        for pred in preds]
+                self.output_file_header = ["id", "prediction"]
+                preds = [int(pred[self._pos_index] >= self.threshold)
+                         for pred in preds]
         elif self._learner.model._estimator_type == 'regressor':
-            return preds
+            self.output_file_header = ["id", "prediction"]
         else:
-            return [self._learner.label_list[pred if isinstance(pred, int) else
-                                             int(pred[0])] for pred in preds]
+            self.output_file_header = ["id", "prediction"]
+            preds = [labels[pred if isinstance(pred, int) else int(pred[0])]
+                     for pred in preds]
+        return preds
 
 
 def main(argv=None):
@@ -99,41 +126,53 @@ def main(argv=None):
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
         conflict_handler='resolve')
     parser.add_argument('model_file',
-                        help='Model file to load and use for generating \
-                              predictions.')
-    parser.add_argument('input_file',
-                        help='A csv file, json file, or megam file \
-                              (with or without the label column), \
-                              with the appropriate suffix.',
+                        help='Model file to load and use for generating '
+                             'predictions.')
+    parser.add_argument('input_files',
+                        help='A space-separated list of csv file, json file, '
+                             'or megam file (with or without the label '
+                             'column), with the appropriate suffix.',
                         nargs='+')
     parser.add_argument('-i', '--id_col',
-                        help='Name of the column which contains the instance \
-                              IDs in ARFF, CSV, or TSV files.',
+                        help='Name of the column which contains the instance '
+                             'IDs in ARFF, CSV, or TSV files.',
                         default='id')
     parser.add_argument('-l', '--label_col',
-                        help='Name of the column which contains the labels\
-                              in ARFF, CSV, or TSV files. For ARFF files, this\
-                              must be the final column to count as the label.',
+                        help='Name of the column which contains the labels '
+                             'in ARFF, CSV, or TSV files. For ARFF files, '
+                             'this must be the final column to count as the '
+                             'label.',
                         default='y')
     parser.add_argument('-p', '--positive_label',
-                        help="If the model is only being used to predict the \
-                              probability of a particular label, this \
-                              specifies the index of the label we're \
-                              predicting. 1 = second label, which is default \
-                              for binary classification. Keep in mind that \
-                              labels are sorted lexicographically.",
+                        help="If the model is only being used to predict the "
+                             "probability of a particular label, this "
+                             "specifies the index of the label we're "
+                             "predicting. 1 = second label, which is default "
+                             "for binary classification. Keep in mind that "
+                             "labels are sorted lexicographically.",
                         default=1, type=int)
     parser.add_argument('-q', '--quiet',
                         help='Suppress printing of "Loading..." messages.',
                         action='store_true')
-    parser.add_argument('-t', '--threshold',
-                        help="If the model we're using is generating \
-                              probabilities of the positive label, return 1 \
-                              if it meets/exceeds the given threshold and 0 \
-                              otherwise.",
-                        type=float)
+    parser.add_argument('--output_file', '-o',
+                        help="Path to output tsv file. If not specified, "
+                             "predictions will be printed to stdout.")
     parser.add_argument('--version', action='version',
                         version='%(prog)s {0}'.format(__version__))
+    probability_handling = parser.add_mutually_exclusive_group()
+    probability_handling.add_argument('-t', '--threshold',
+                                      help="If the model we're using is "
+                                           "generating probabilities of the "
+                                           "positive label, return 1 if it "
+                                           "meets/exceeds the given threshold "
+                                           "and 0 otherwise.",  type=float)
+    probability_handling.add_argument('--all_probabilities', '-a',
+                                      action='store_true',
+                                      help="Flag indicating whether to output "
+                                           "the probabilities of all labels "
+                                           "instead of just the probability "
+                                           "of the positive label.")
+
     args = parser.parse_args(argv)
 
     # Make warnings from built-in warnings module get formatted more nicely
@@ -145,10 +184,12 @@ def main(argv=None):
     # Create the classifier and load the model
     predictor = Predictor(args.model_file,
                           positive_label=args.positive_label,
-                          threshold=args.threshold)
+                          threshold=args.threshold,
+                          all_labels=args.all_probabilities,
+                          logger=logger)
 
     # Iterate over all the specified input files
-    for input_file in args.input_file:
+    for i, input_file in enumerate(args.input_files):
 
         # make sure each file extension is one we can process
         input_extension = os.path.splitext(input_file)[1].lower()
@@ -164,8 +205,34 @@ def main(argv=None):
                                                     label_col=args.label_col,
                                                     id_col=args.id_col)
             feature_set = reader.read()
-            for pred in predictor.predict(feature_set):
-                print(pred)
+            preds = predictor.predict(feature_set)
+            header = predictor.output_file_header
+
+            if args.output_file is not None:
+                with open(args.output_file, 'a') as outputfh:
+                    if i == 0:  # Only write header once per set of input files
+                        print("\t".join(header), file=outputfh)
+                    if args.all_probabilities:
+                        for j, probabilities in enumerate(preds):
+                            id_ = feature_set.ids[j]
+                            probs_str = "\t".join([str(p) for p in probabilities])
+                            print("{}\t{}".format(id_, probs_str), file=outputfh)
+                    else:
+                        for j, pred in enumerate(preds):
+                            id_ = feature_set.ids[j]
+                            print("{}\t{}".format(id_, pred), file=outputfh)
+            else:
+                if i == 0:  # Only write header once per set of input files
+                    print("\t".join(header))
+                if args.all_probabilities:
+                    for j, probabilities in enumerate(preds):
+                        id_ = feature_set.ids[j]
+                        probs_str = "\t".join([str(p) for p in probabilities])
+                        print("{}\t{}".format(id_, probs_str))
+                else:
+                    for j, pred in enumerate(preds):
+                        id_ = feature_set.ids[j]
+                        print("{}\t{}".format(id_, pred))
 
 
 if __name__ == '__main__':
diff --git a/skll/version.py b/skll/version.py
index b67290bc..3bb8833e 100644
--- a/skll/version.py
+++ b/skll/version.py
@@ -7,5 +7,5 @@
 :organization: ETS
 """
 
-__version__ = '1.5'
+__version__ = '1.5.3'
 VERSION = tuple(int(x) for x in __version__.split('.'))
diff --git a/tests/configs/test_single_file_saved_subset.template.cfg b/tests/configs/test_single_file_saved_subset.template.cfg
new file mode 100644
index 00000000..de8e2b48
--- /dev/null
+++ b/tests/configs/test_single_file_saved_subset.template.cfg
@@ -0,0 +1,11 @@
+[General]
+experiment_name=train_test_single_file
+task=evaluate
+
+[Input]
+learners=["RandomForestClassifier"]
+
+[Tuning]
+
+[Output]
+probability=false
diff --git a/tests/other/examples_test.jsonlines b/tests/other/examples_test.jsonlines
new file mode 100644
index 00000000..6d344cc6
--- /dev/null
+++ b/tests/other/examples_test.jsonlines
@@ -0,0 +1,50 @@
+{"id": "EXAMPLE_73", "y": "versicolor", "x": {"f0": 6.1, "f1": 2.8, "f2": 4.7, "f3": 1.2}}
+{"id": "EXAMPLE_18", "y": "setosa", "x": {"f0": 5.7, "f1": 3.8, "f2": 1.7, "f3": 0.3}}
+{"id": "EXAMPLE_118", "y": "virginica", "x": {"f0": 7.7, "f1": 2.6, "f2": 6.9, "f3": 2.3}}
+{"id": "EXAMPLE_78", "y": "versicolor", "x": {"f0": 6.0, "f1": 2.9, "f2": 4.5, "f3": 1.5}}
+{"id": "EXAMPLE_76", "y": "versicolor", "x": {"f0": 6.8, "f1": 2.8, "f2": 4.8, "f3": 1.4}}
+{"id": "EXAMPLE_31", "y": "setosa", "x": {"f0": 5.4, "f1": 3.4, "f2": 1.5, "f3": 0.4}}
+{"id": "EXAMPLE_64", "y": "versicolor", "x": {"f0": 5.6, "f1": 2.9, "f2": 3.6, "f3": 1.3}}
+{"id": "EXAMPLE_141", "y": "virginica", "x": {"f0": 6.9, "f1": 3.1, "f2": 5.1, "f3": 2.3}}
+{"id": "EXAMPLE_68", "y": "versicolor", "x": {"f0": 6.2, "f1": 2.2, "f2": 4.5, "f3": 1.5}}
+{"id": "EXAMPLE_82", "y": "versicolor", "x": {"f0": 5.8, "f1": 2.7, "f2": 3.9, "f3": 1.2}}
+{"id": "EXAMPLE_110", "y": "virginica", "x": {"f0": 6.5, "f1": 3.2, "f2": 5.1, "f3": 2.0}}
+{"id": "EXAMPLE_12", "y": "setosa", "x": {"f0": 4.8, "f1": 3.0, "f2": 1.4, "f3": 0.1}}
+{"id": "EXAMPLE_36", "y": "setosa", "x": {"f0": 5.5, "f1": 3.5, "f2": 1.3, "f3": 0.2}}
+{"id": "EXAMPLE_9", "y": "setosa", "x": {"f0": 4.9, "f1": 3.1, "f2": 1.5, "f3": 0.1}}
+{"id": "EXAMPLE_19", "y": "setosa", "x": {"f0": 5.1, "f1": 3.8, "f2": 1.5, "f3": 0.3}}
+{"id": "EXAMPLE_56", "y": "versicolor", "x": {"f0": 6.3, "f1": 3.3, "f2": 4.7, "f3": 1.6}}
+{"id": "EXAMPLE_104", "y": "virginica", "x": {"f0": 6.5, "f1": 3.0, "f2": 5.8, "f3": 2.2}}
+{"id": "EXAMPLE_69", "y": "versicolor", "x": {"f0": 5.6, "f1": 2.5, "f2": 3.9, "f3": 1.1}}
+{"id": "EXAMPLE_55", "y": "versicolor", "x": {"f0": 5.7, "f1": 2.8, "f2": 4.5, "f3": 1.3}}
+{"id": "EXAMPLE_132", "y": "virginica", "x": {"f0": 6.4, "f1": 2.8, "f2": 5.6, "f3": 2.2}}
+{"id": "EXAMPLE_29", "y": "setosa", "x": {"f0": 4.7, "f1": 3.2, "f2": 1.6, "f3": 0.2}}
+{"id": "EXAMPLE_127", "y": "virginica", "x": {"f0": 6.1, "f1": 3.0, "f2": 4.9, "f3": 1.8}}
+{"id": "EXAMPLE_26", "y": "setosa", "x": {"f0": 5.0, "f1": 3.4, "f2": 1.6, "f3": 0.4}}
+{"id": "EXAMPLE_128", "y": "virginica", "x": {"f0": 6.4, "f1": 2.8, "f2": 5.6, "f3": 2.1}}
+{"id": "EXAMPLE_131", "y": "virginica", "x": {"f0": 7.9, "f1": 3.8, "f2": 6.4, "f3": 2.0}}
+{"id": "EXAMPLE_145", "y": "virginica", "x": {"f0": 6.7, "f1": 3.0, "f2": 5.2, "f3": 2.3}}
+{"id": "EXAMPLE_108", "y": "virginica", "x": {"f0": 6.7, "f1": 2.5, "f2": 5.8, "f3": 1.8}}
+{"id": "EXAMPLE_143", "y": "virginica", "x": {"f0": 6.8, "f1": 3.2, "f2": 5.9, "f3": 2.3}}
+{"id": "EXAMPLE_45", "y": "setosa", "x": {"f0": 4.8, "f1": 3.0, "f2": 1.4, "f3": 0.3}}
+{"id": "EXAMPLE_30", "y": "setosa", "x": {"f0": 4.8, "f1": 3.1, "f2": 1.6, "f3": 0.2}}
+{"id": "EXAMPLE_22", "y": "setosa", "x": {"f0": 4.6, "f1": 3.6, "f2": 1.0, "f3": 0.2}}
+{"id": "EXAMPLE_15", "y": "setosa", "x": {"f0": 5.7, "f1": 4.4, "f2": 1.5, "f3": 0.4}}
+{"id": "EXAMPLE_65", "y": "versicolor", "x": {"f0": 6.7, "f1": 3.1, "f2": 4.4, "f3": 1.4}}
+{"id": "EXAMPLE_11", "y": "setosa", "x": {"f0": 4.8, "f1": 3.4, "f2": 1.6, "f3": 0.2}}
+{"id": "EXAMPLE_42", "y": "setosa", "x": {"f0": 4.4, "f1": 3.2, "f2": 1.3, "f3": 0.2}}
+{"id": "EXAMPLE_146", "y": "virginica", "x": {"f0": 6.3, "f1": 2.5, "f2": 5.0, "f3": 1.9}}
+{"id": "EXAMPLE_51", "y": "versicolor", "x": {"f0": 6.4, "f1": 3.2, "f2": 4.5, "f3": 1.5}}
+{"id": "EXAMPLE_27", "y": "setosa", "x": {"f0": 5.2, "f1": 3.5, "f2": 1.5, "f3": 0.2}}
+{"id": "EXAMPLE_4", "y": "setosa", "x": {"f0": 5.0, "f1": 3.6, "f2": 1.4, "f3": 0.2}}
+{"id": "EXAMPLE_32", "y": "setosa", "x": {"f0": 5.2, "f1": 4.1, "f2": 1.5, "f3": 0.1}}
+{"id": "EXAMPLE_142", "y": "virginica", "x": {"f0": 5.8, "f1": 2.7, "f2": 5.1, "f3": 1.9}}
+{"id": "EXAMPLE_85", "y": "versicolor", "x": {"f0": 6.0, "f1": 3.4, "f2": 4.5, "f3": 1.6}}
+{"id": "EXAMPLE_86", "y": "versicolor", "x": {"f0": 6.7, "f1": 3.1, "f2": 4.7, "f3": 1.5}}
+{"id": "EXAMPLE_16", "y": "setosa", "x": {"f0": 5.4, "f1": 3.9, "f2": 1.3, "f3": 0.4}}
+{"id": "EXAMPLE_10", "y": "setosa", "x": {"f0": 5.4, "f1": 3.7, "f2": 1.5, "f3": 0.2}}
+{"id": "EXAMPLE_81", "y": "versicolor", "x": {"f0": 5.5, "f1": 2.4, "f2": 3.7, "f3": 1.0}}
+{"id": "EXAMPLE_133", "y": "virginica", "x": {"f0": 6.3, "f1": 2.8, "f2": 5.1, "f3": 1.5}}
+{"id": "EXAMPLE_137", "y": "virginica", "x": {"f0": 6.4, "f1": 3.1, "f2": 5.5, "f3": 1.8}}
+{"id": "EXAMPLE_75", "y": "versicolor", "x": {"f0": 6.6, "f1": 3.0, "f2": 4.4, "f3": 1.4}}
+{"id": "EXAMPLE_109", "y": "virginica", "x": {"f0": 7.2, "f1": 3.6, "f2": 6.1, "f3": 2.5}}
diff --git a/tests/other/examples_train.jsonlines b/tests/other/examples_train.jsonlines
new file mode 100644
index 00000000..2d403b02
--- /dev/null
+++ b/tests/other/examples_train.jsonlines
@@ -0,0 +1,100 @@
+{"id": "EXAMPLE_96", "y": "versicolor", "x": {"f0": 5.7, "f1": 2.9, "f2": 4.2, "f3": 1.3}}
+{"id": "EXAMPLE_105", "y": "virginica", "x": {"f0": 7.6, "f1": 3.0, "f2": 6.6, "f3": 2.1}}
+{"id": "EXAMPLE_66", "y": "versicolor", "x": {"f0": 5.6, "f1": 3.0, "f2": 4.5, "f3": 1.5}}
+{"id": "EXAMPLE_0", "y": "setosa", "x": {"f0": 5.1, "f1": 3.5, "f2": 1.4, "f3": 0.2}}
+{"id": "EXAMPLE_122", "y": "virginica", "x": {"f0": 7.7, "f1": 2.8, "f2": 6.7, "f3": 2.0}}
+{"id": "EXAMPLE_67", "y": "versicolor", "x": {"f0": 5.8, "f1": 2.7, "f2": 4.1, "f3": 1.0}}
+{"id": "EXAMPLE_28", "y": "setosa", "x": {"f0": 5.2, "f1": 3.4, "f2": 1.4, "f3": 0.2}}
+{"id": "EXAMPLE_40", "y": "setosa", "x": {"f0": 5.0, "f1": 3.5, "f2": 1.3, "f3": 0.3}}
+{"id": "EXAMPLE_44", "y": "setosa", "x": {"f0": 5.1, "f1": 3.8, "f2": 1.9, "f3": 0.4}}
+{"id": "EXAMPLE_60", "y": "versicolor", "x": {"f0": 5.0, "f1": 2.0, "f2": 3.5, "f3": 1.0}}
+{"id": "EXAMPLE_123", "y": "virginica", "x": {"f0": 6.3, "f1": 2.7, "f2": 4.9, "f3": 1.8}}
+{"id": "EXAMPLE_24", "y": "setosa", "x": {"f0": 4.8, "f1": 3.4, "f2": 1.9, "f3": 0.2}}
+{"id": "EXAMPLE_25", "y": "setosa", "x": {"f0": 5.0, "f1": 3.0, "f2": 1.6, "f3": 0.2}}
+{"id": "EXAMPLE_23", "y": "setosa", "x": {"f0": 5.1, "f1": 3.3, "f2": 1.7, "f3": 0.5}}
+{"id": "EXAMPLE_94", "y": "versicolor", "x": {"f0": 5.6, "f1": 2.7, "f2": 4.2, "f3": 1.3}}
+{"id": "EXAMPLE_39", "y": "setosa", "x": {"f0": 5.1, "f1": 3.4, "f2": 1.5, "f3": 0.2}}
+{"id": "EXAMPLE_95", "y": "versicolor", "x": {"f0": 5.7, "f1": 3.0, "f2": 4.2, "f3": 1.2}}
+{"id": "EXAMPLE_117", "y": "virginica", "x": {"f0": 7.7, "f1": 3.8, "f2": 6.7, "f3": 2.2}}
+{"id": "EXAMPLE_47", "y": "setosa", "x": {"f0": 4.6, "f1": 3.2, "f2": 1.4, "f3": 0.2}}
+{"id": "EXAMPLE_97", "y": "versicolor", "x": {"f0": 6.2, "f1": 2.9, "f2": 4.3, "f3": 1.3}}
+{"id": "EXAMPLE_113", "y": "virginica", "x": {"f0": 5.7, "f1": 2.5, "f2": 5.0, "f3": 2.0}}
+{"id": "EXAMPLE_33", "y": "setosa", "x": {"f0": 5.5, "f1": 4.2, "f2": 1.4, "f3": 0.2}}
+{"id": "EXAMPLE_138", "y": "virginica", "x": {"f0": 6.0, "f1": 3.0, "f2": 4.8, "f3": 1.8}}
+{"id": "EXAMPLE_101", "y": "virginica", "x": {"f0": 5.8, "f1": 2.7, "f2": 5.1, "f3": 1.9}}
+{"id": "EXAMPLE_62", "y": "versicolor", "x": {"f0": 6.0, "f1": 2.2, "f2": 4.0, "f3": 1.0}}
+{"id": "EXAMPLE_84", "y": "versicolor", "x": {"f0": 5.4, "f1": 3.0, "f2": 4.5, "f3": 1.5}}
+{"id": "EXAMPLE_148", "y": "virginica", "x": {"f0": 6.2, "f1": 3.4, "f2": 5.4, "f3": 2.3}}
+{"id": "EXAMPLE_53", "y": "versicolor", "x": {"f0": 5.5, "f1": 2.3, "f2": 4.0, "f3": 1.3}}
+{"id": "EXAMPLE_5", "y": "setosa", "x": {"f0": 5.4, "f1": 3.9, "f2": 1.7, "f3": 0.4}}
+{"id": "EXAMPLE_93", "y": "versicolor", "x": {"f0": 5.0, "f1": 2.3, "f2": 3.3, "f3": 1.0}}
+{"id": "EXAMPLE_111", "y": "virginica", "x": {"f0": 6.4, "f1": 2.7, "f2": 5.3, "f3": 1.9}}
+{"id": "EXAMPLE_49", "y": "setosa", "x": {"f0": 5.0, "f1": 3.3, "f2": 1.4, "f3": 0.2}}
+{"id": "EXAMPLE_35", "y": "setosa", "x": {"f0": 5.0, "f1": 3.2, "f2": 1.2, "f3": 0.2}}
+{"id": "EXAMPLE_80", "y": "versicolor", "x": {"f0": 5.5, "f1": 2.4, "f2": 3.8, "f3": 1.1}}
+{"id": "EXAMPLE_77", "y": "versicolor", "x": {"f0": 6.7, "f1": 3.0, "f2": 5.0, "f3": 1.7}}
+{"id": "EXAMPLE_34", "y": "setosa", "x": {"f0": 4.9, "f1": 3.1, "f2": 1.5, "f3": 0.1}}
+{"id": "EXAMPLE_114", "y": "virginica", "x": {"f0": 5.8, "f1": 2.8, "f2": 5.1, "f3": 2.4}}
+{"id": "EXAMPLE_7", "y": "setosa", "x": {"f0": 5.0, "f1": 3.4, "f2": 1.5, "f3": 0.2}}
+{"id": "EXAMPLE_43", "y": "setosa", "x": {"f0": 5.0, "f1": 3.5, "f2": 1.6, "f3": 0.6}}
+{"id": "EXAMPLE_70", "y": "versicolor", "x": {"f0": 5.9, "f1": 3.2, "f2": 4.8, "f3": 1.8}}
+{"id": "EXAMPLE_98", "y": "versicolor", "x": {"f0": 5.1, "f1": 2.5, "f2": 3.0, "f3": 1.1}}
+{"id": "EXAMPLE_120", "y": "virginica", "x": {"f0": 6.9, "f1": 3.2, "f2": 5.7, "f3": 2.3}}
+{"id": "EXAMPLE_83", "y": "versicolor", "x": {"f0": 6.0, "f1": 2.7, "f2": 5.1, "f3": 1.6}}
+{"id": "EXAMPLE_134", "y": "virginica", "x": {"f0": 6.1, "f1": 2.6, "f2": 5.6, "f3": 1.4}}
+{"id": "EXAMPLE_135", "y": "virginica", "x": {"f0": 7.7, "f1": 3.0, "f2": 6.1, "f3": 2.3}}
+{"id": "EXAMPLE_89", "y": "versicolor", "x": {"f0": 5.5, "f1": 2.5, "f2": 4.0, "f3": 1.3}}
+{"id": "EXAMPLE_8", "y": "setosa", "x": {"f0": 4.4, "f1": 2.9, "f2": 1.4, "f3": 0.2}}
+{"id": "EXAMPLE_13", "y": "setosa", "x": {"f0": 4.3, "f1": 3.0, "f2": 1.1, "f3": 0.1}}
+{"id": "EXAMPLE_119", "y": "virginica", "x": {"f0": 6.0, "f1": 2.2, "f2": 5.0, "f3": 1.5}}
+{"id": "EXAMPLE_125", "y": "virginica", "x": {"f0": 7.2, "f1": 3.2, "f2": 6.0, "f3": 1.8}}
+{"id": "EXAMPLE_3", "y": "setosa", "x": {"f0": 4.6, "f1": 3.1, "f2": 1.5, "f3": 0.2}}
+{"id": "EXAMPLE_17", "y": "setosa", "x": {"f0": 5.1, "f1": 3.5, "f2": 1.4, "f3": 0.3}}
+{"id": "EXAMPLE_38", "y": "setosa", "x": {"f0": 4.4, "f1": 3.0, "f2": 1.3, "f3": 0.2}}
+{"id": "EXAMPLE_72", "y": "versicolor", "x": {"f0": 6.3, "f1": 2.5, "f2": 4.9, "f3": 1.5}}
+{"id": "EXAMPLE_136", "y": "virginica", "x": {"f0": 6.3, "f1": 3.4, "f2": 5.6, "f3": 2.4}}
+{"id": "EXAMPLE_6", "y": "setosa", "x": {"f0": 4.6, "f1": 3.4, "f2": 1.4, "f3": 0.3}}
+{"id": "EXAMPLE_112", "y": "virginica", "x": {"f0": 6.8, "f1": 3.0, "f2": 5.5, "f3": 2.1}}
+{"id": "EXAMPLE_100", "y": "virginica", "x": {"f0": 6.3, "f1": 3.3, "f2": 6.0, "f3": 2.5}}
+{"id": "EXAMPLE_2", "y": "setosa", "x": {"f0": 4.7, "f1": 3.2, "f2": 1.3, "f3": 0.2}}
+{"id": "EXAMPLE_63", "y": "versicolor", "x": {"f0": 6.1, "f1": 2.9, "f2": 4.7, "f3": 1.4}}
+{"id": "EXAMPLE_54", "y": "versicolor", "x": {"f0": 6.5, "f1": 2.8, "f2": 4.6, "f3": 1.5}}
+{"id": "EXAMPLE_126", "y": "virginica", "x": {"f0": 6.2, "f1": 2.8, "f2": 4.8, "f3": 1.8}}
+{"id": "EXAMPLE_50", "y": "versicolor", "x": {"f0": 7.0, "f1": 3.2, "f2": 4.7, "f3": 1.4}}
+{"id": "EXAMPLE_115", "y": "virginica", "x": {"f0": 6.4, "f1": 3.2, "f2": 5.3, "f3": 2.3}}
+{"id": "EXAMPLE_46", "y": "setosa", "x": {"f0": 5.1, "f1": 3.8, "f2": 1.6, "f3": 0.2}}
+{"id": "EXAMPLE_139", "y": "virginica", "x": {"f0": 6.9, "f1": 3.1, "f2": 5.4, "f3": 2.1}}
+{"id": "EXAMPLE_61", "y": "versicolor", "x": {"f0": 5.9, "f1": 3.0, "f2": 4.2, "f3": 1.5}}
+{"id": "EXAMPLE_147", "y": "virginica", "x": {"f0": 6.5, "f1": 3.0, "f2": 5.2, "f3": 2.0}}
+{"id": "EXAMPLE_79", "y": "versicolor", "x": {"f0": 5.7, "f1": 2.6, "f2": 3.5, "f3": 1.0}}
+{"id": "EXAMPLE_59", "y": "versicolor", "x": {"f0": 5.2, "f1": 2.7, "f2": 3.9, "f3": 1.4}}
+{"id": "EXAMPLE_91", "y": "versicolor", "x": {"f0": 6.1, "f1": 3.0, "f2": 4.6, "f3": 1.4}}
+{"id": "EXAMPLE_41", "y": "setosa", "x": {"f0": 4.5, "f1": 2.3, "f2": 1.3, "f3": 0.3}}
+{"id": "EXAMPLE_58", "y": "versicolor", "x": {"f0": 6.6, "f1": 2.9, "f2": 4.6, "f3": 1.3}}
+{"id": "EXAMPLE_90", "y": "versicolor", "x": {"f0": 5.5, "f1": 2.6, "f2": 4.4, "f3": 1.2}}
+{"id": "EXAMPLE_48", "y": "setosa", "x": {"f0": 5.3, "f1": 3.7, "f2": 1.5, "f3": 0.2}}
+{"id": "EXAMPLE_88", "y": "versicolor", "x": {"f0": 5.6, "f1": 3.0, "f2": 4.1, "f3": 1.3}}
+{"id": "EXAMPLE_107", "y": "virginica", "x": {"f0": 7.3, "f1": 2.9, "f2": 6.3, "f3": 1.8}}
+{"id": "EXAMPLE_124", "y": "virginica", "x": {"f0": 6.7, "f1": 3.3, "f2": 5.7, "f3": 2.1}}
+{"id": "EXAMPLE_21", "y": "setosa", "x": {"f0": 5.1, "f1": 3.7, "f2": 1.5, "f3": 0.4}}
+{"id": "EXAMPLE_57", "y": "versicolor", "x": {"f0": 4.9, "f1": 2.4, "f2": 3.3, "f3": 1.0}}
+{"id": "EXAMPLE_144", "y": "virginica", "x": {"f0": 6.7, "f1": 3.3, "f2": 5.7, "f3": 2.5}}
+{"id": "EXAMPLE_129", "y": "virginica", "x": {"f0": 7.2, "f1": 3.0, "f2": 5.8, "f3": 1.6}}
+{"id": "EXAMPLE_37", "y": "setosa", "x": {"f0": 4.9, "f1": 3.1, "f2": 1.5, "f3": 0.1}}
+{"id": "EXAMPLE_140", "y": "virginica", "x": {"f0": 6.7, "f1": 3.1, "f2": 5.6, "f3": 2.4}}
+{"id": "EXAMPLE_1", "y": "setosa", "x": {"f0": 4.9, "f1": 3.0, "f2": 1.4, "f3": 0.2}}
+{"id": "EXAMPLE_52", "y": "versicolor", "x": {"f0": 6.9, "f1": 3.1, "f2": 4.9, "f3": 1.5}}
+{"id": "EXAMPLE_130", "y": "virginica", "x": {"f0": 7.4, "f1": 2.8, "f2": 6.1, "f3": 1.9}}
+{"id": "EXAMPLE_103", "y": "virginica", "x": {"f0": 6.3, "f1": 2.9, "f2": 5.6, "f3": 1.8}}
+{"id": "EXAMPLE_99", "y": "versicolor", "x": {"f0": 5.7, "f1": 2.8, "f2": 4.1, "f3": 1.3}}
+{"id": "EXAMPLE_116", "y": "virginica", "x": {"f0": 6.5, "f1": 3.0, "f2": 5.5, "f3": 1.8}}
+{"id": "EXAMPLE_87", "y": "versicolor", "x": {"f0": 6.3, "f1": 2.3, "f2": 4.4, "f3": 1.3}}
+{"id": "EXAMPLE_74", "y": "versicolor", "x": {"f0": 6.4, "f1": 2.9, "f2": 4.3, "f3": 1.3}}
+{"id": "EXAMPLE_121", "y": "virginica", "x": {"f0": 5.6, "f1": 2.8, "f2": 4.9, "f3": 2.0}}
+{"id": "EXAMPLE_149", "y": "virginica", "x": {"f0": 5.9, "f1": 3.0, "f2": 5.1, "f3": 1.8}}
+{"id": "EXAMPLE_20", "y": "setosa", "x": {"f0": 5.4, "f1": 3.4, "f2": 1.7, "f3": 0.2}}
+{"id": "EXAMPLE_71", "y": "versicolor", "x": {"f0": 6.1, "f1": 2.8, "f2": 4.0, "f3": 1.3}}
+{"id": "EXAMPLE_106", "y": "virginica", "x": {"f0": 4.9, "f1": 2.5, "f2": 4.5, "f3": 1.7}}
+{"id": "EXAMPLE_14", "y": "setosa", "x": {"f0": 5.8, "f1": 4.0, "f2": 1.2, "f3": 0.2}}
+{"id": "EXAMPLE_92", "y": "versicolor", "x": {"f0": 5.8, "f1": 2.6, "f2": 4.0, "f3": 1.2}}
+{"id": "EXAMPLE_102", "y": "virginica", "x": {"f0": 7.1, "f1": 3.0, "f2": 5.9, "f3": 2.1}}
diff --git a/tests/test_classification.py b/tests/test_classification.py
index c026f154..dde6b237 100644
--- a/tests/test_classification.py
+++ b/tests/test_classification.py
@@ -26,9 +26,11 @@
 from nose.tools import eq_, assert_almost_equal, raises
 
 from sklearn.exceptions import ConvergenceWarning
+from sklearn.feature_extraction import FeatureHasher
 from sklearn.metrics import accuracy_score
 
 from skll.data import FeatureSet
+from skll.data.readers import NDJReader
 from skll.data.writers import NDJWriter
 from skll.config import _parse_config_file
 from skll.experiments import run_configuration
@@ -76,9 +78,11 @@ def tearDown():
     for output_file in glob.glob(join(output_dir, 'train_test_single_file_*')):
         os.unlink(output_file)
 
-    config_file = join(config_dir, 'test_single_file.cfg')
-    if exists(config_file):
-        os.unlink(config_file)
+    config_files = [join(config_dir, cfgname) for cfgname in ['test_single_file.cfg',
+                                                              'test_single_file_saved_subset']]
+    for config_file in config_files:
+        if exists(config_file):
+            os.unlink(config_file)
 
 
 def check_predict(model, use_feature_hashing=False):
@@ -127,6 +131,70 @@ def test_predict():
         yield check_predict, model, use_feature_hashing
 
 
+# test predictions when both the model and the data use DictVectorizers
+def test_predict_dict_dict():
+    train_file = join(_my_dir, 'other', 'examples_train.jsonlines')
+    test_file = join(_my_dir, 'other', 'examples_test.jsonlines')
+    train_fs = NDJReader.for_path(train_file).read()
+    test_fs = NDJReader.for_path(test_file).read()
+    learner = Learner('LogisticRegression')
+    learner.train(train_fs, grid_search=False)
+    predictions = learner.predict(test_fs)
+    eq_(len(predictions), test_fs.features.shape[0])
+
+
+# test predictions when both the model and the data use FeatureHashers
+# and the same number of bins
+def test_predict_hasher_hasher_same_bins():
+    train_file = join(_my_dir, 'other', 'examples_train.jsonlines')
+    test_file = join(_my_dir, 'other', 'examples_test.jsonlines')
+    train_fs = NDJReader.for_path(train_file, feature_hasher=True, num_features=3).read()
+    test_fs = NDJReader.for_path(test_file, feature_hasher=True, num_features=3).read()
+    learner = Learner('LogisticRegression')
+    learner.train(train_fs, grid_search=False)
+    predictions = learner.predict(test_fs)
+    eq_(len(predictions), test_fs.features.shape[0])
+
+
+# test predictions when both the model and the data use FeatureHashers
+# but different number of bins
+@raises(RuntimeError)
+def test_predict_hasher_hasher_different_bins():
+    train_file = join(_my_dir, 'other', 'examples_train.jsonlines')
+    test_file = join(_my_dir, 'other', 'examples_test.jsonlines')
+    train_fs = NDJReader.for_path(train_file, feature_hasher=True, num_features=3).read()
+    test_fs = NDJReader.for_path(test_file, feature_hasher=True, num_features=2).read()
+    learner = Learner('LogisticRegression')
+    learner.train(train_fs, grid_search=False)
+    _ = learner.predict(test_fs)
+
+
+# test predictions when model uses a FeatureHasher and data
+# uses a DictVectorizer
+def test_predict_hasher_dict():
+    train_file = join(_my_dir, 'other', 'examples_train.jsonlines')
+    test_file = join(_my_dir, 'other', 'examples_test.jsonlines')
+    train_fs = NDJReader.for_path(train_file, feature_hasher=True, num_features=3).read()
+    test_fs = NDJReader.for_path(test_file).read()
+    learner = Learner('LogisticRegression')
+    learner.train(train_fs, grid_search=False)
+    predictions = learner.predict(test_fs)
+    eq_(len(predictions), test_fs.features.shape[0])
+
+
+# test predictions when model uses a DictVectorizer and data
+# uses a FeatureHasher
+@raises(RuntimeError)
+def test_predict_dict_hasher():
+    train_file = join(_my_dir, 'other', 'examples_train.jsonlines')
+    test_file = join(_my_dir, 'other', 'examples_test.jsonlines')
+    train_fs = NDJReader.for_path(train_file).read()
+    test_fs = NDJReader.for_path(test_file, feature_hasher=True, num_features=3).read()
+    learner = Learner('LogisticRegression')
+    learner.train(train_fs, grid_search=False)
+    _ = learner.predict(test_fs)
+
+
 # the function to create data with rare labels for cross-validation
 def make_rare_class_data():
     """
@@ -190,7 +258,7 @@ def test_sparse_predict():
                                              [(0.45, 0.52), (0.52, 0.5),
                                               (0.48, 0.5), (0.49, 0.5),
                                               (0.43, 0), (0.53, 0.57),
-                                              (0.49, 0.49), (0.48, 0.5)]):
+                                              (0.49, 0.49), (0.5, 0.49)]):
         yield check_sparse_predict, learner_name, expected_scores[0], False
         if learner_name != 'MultinomialNB':
             yield check_sparse_predict, learner_name, expected_scores[1], True
@@ -207,7 +275,7 @@ def test_mlp_classification():
     learner = Learner('MLPClassifier')
     with warnings.catch_warnings():
         warnings.filterwarnings('ignore', category=ConvergenceWarning)
-        learner.train(train_fs, grid_search=True)
+        learner.train(train_fs, grid_search=False)
 
     # now generate the predictions on the test set
     predictions = learner.predict(test_fs)
@@ -217,7 +285,7 @@ def test_mlp_classification():
     # using make_regression_data. To do this, we just
     # make sure that they are correlated
     accuracy = accuracy_score(predictions, test_fs.labels)
-    assert_almost_equal(accuracy, 0.825)
+    assert_almost_equal(accuracy, 0.858, places=3)
 
 
 def check_sparse_predict_sampler(use_feature_hashing=False):
@@ -301,6 +369,12 @@ def make_single_file_featureset_data():
     writer = NDJWriter(test_path, test_fs)
     writer.write()
 
+    # Also write another test feature set that has fewer features than the training set
+    test_fs.filter(features=['f01', 'f02'])
+    test_path = join(_my_dir, 'test', 'test_single_file_subset.jsonlines')
+    writer = NDJWriter(test_path, test_fs)
+    writer.write()
+
 
 def test_train_file_test_file():
     """
@@ -340,6 +414,43 @@ def test_train_file_test_file():
     assert_almost_equal(result_dict['score'], 0.9491525423728813)
 
 
+def test_predict_on_subset_with_existing_model():
+    """
+    Test generating predictions on subset with existing model
+    """
+    # Create data files
+    make_single_file_featureset_data()
+
+    # train and save a model on the training file
+    train_fs = NDJReader.for_path(join(_my_dir, 'train', 'train_single_file.jsonlines')).read()
+    learner = Learner('RandomForestClassifier')
+    learner.train(train_fs, grid_search=True, grid_objective="accuracy")
+    model_filename = join(_my_dir, 'output', ('train_test_single_file_train_train_'
+                                              'single_file.jsonlines_test_test_single'
+                                              '_file_subset.jsonlines_RandomForestClassifier'
+                                              '.model'))
+
+    learner.save(model_filename)
+
+    # Run experiment
+    config_path = fill_in_config_paths_for_single_file(join(_my_dir, "configs",
+                                                            "test_single_file_saved_subset"
+                                                            ".template.cfg"),
+                                                       join(_my_dir, 'train', 'train_single_file.jsonlines'),
+                                                       join(_my_dir, 'test',
+                                                            'test_single_file_subset.'
+                                                            'jsonlines'))
+    run_configuration(config_path, quiet=True, overwrite=False)
+
+    # Check results
+    with open(join(_my_dir, 'output', ('train_test_single_file_train_train_'
+                                       'single_file.jsonlines_test_test_single'
+                                       '_file_subset.jsonlines_RandomForestClassifier'
+                                       '.results.json'))) as f:
+        result_dict = json.load(f)[0]
+    assert_almost_equal(result_dict['score'], 0.7333333)
+
+
 def test_train_file_test_file_ablation():
     """
     Test that specifying ablation with train and test file is ignored
diff --git a/tests/test_featureset.py b/tests/test_featureset.py
index 3ae1f6ee..d5cb6f30 100644
--- a/tests/test_featureset.py
+++ b/tests/test_featureset.py
@@ -25,7 +25,7 @@
 from sklearn.datasets.samples_generator import make_classification
 
 import skll
-from skll.data import FeatureSet, Writer, Reader
+from skll.data import FeatureSet, Writer, Reader, NDJReader, NDJWriter
 from skll.data.readers import DictListReader
 from skll.experiments import _load_featureset
 from skll.learner import _DEFAULT_PARAM_GRIDS
@@ -62,6 +62,11 @@ def tearDown():
         if exists(filepath):
             os.unlink(filepath)
 
+    filepaths = [join(_my_dir, 'other', '{}.jsonlines'.format(x)) for x in ['test_string_ids', 'test_string_ids_df', 'test_string_labels_df']]
+    for filepath in filepaths:
+        if exists(filepath):
+            os.unlink(filepath)
+
 
 def _create_empty_file(filetype):
     filepath = join(_my_dir, 'other', 'empty.{}'.format(filetype))
@@ -1026,3 +1031,79 @@ def test_featureset_creation_from_dataframe_without_labels_with_vectorizer():
                         rtol=1e-6) and
             np.all(np.isnan(expected.labels)) and
             np.all(np.isnan(current.labels)))
+
+
+def test_writing_ndj_featureset_with_string_ids():
+    test_dict_vectorizer = DictVectorizer()
+    test_feat_dict_list = [{'a': 1.0, 'b': 1.0}, {'b': 1.0, 'c': 1.0}]
+    Xtest = test_dict_vectorizer.fit_transform(test_feat_dict_list)
+    fs_test = FeatureSet('test',
+                         ids=['1', '2'],
+                         labels=[1, 2],
+                         features=Xtest,
+                         vectorizer=test_dict_vectorizer)
+    output_path = join(_my_dir, "other", "test_string_ids.jsonlines")
+    test_writer = NDJWriter(output_path, fs_test)
+    test_writer.write()
+
+    # read in the written file into a featureset and confirm that the
+    # two featuresets are equal
+    fs_test2 = NDJReader.for_path(output_path).read()
+
+    assert fs_test == fs_test2
+
+
+@attr('have_pandas_and_seaborn')
+def test_featureset_creation_from_dataframe_with_string_ids():
+
+    import pandas
+
+    dftest = pandas.DataFrame({"id": ['1', '2'],
+                               "score": [1, 2],
+                               "text": ["a b", "b c"]})
+    dftest.set_index("id", inplace=True)
+    test_feat_dict_list = [{'a': 1.0, 'b': 1.0}, {'b': 1.0, 'c': 1.0}]
+    test_dict_vectorizer = DictVectorizer()
+    Xtest = test_dict_vectorizer.fit_transform(test_feat_dict_list)
+    fs_test = FeatureSet('test',
+                         ids=dftest.index.values,
+                         labels=dftest['score'].values,
+                         features=Xtest,
+                         vectorizer=test_dict_vectorizer)
+    output_path = join(_my_dir, "other", "test_string_ids_df.jsonlines")
+    test_writer = NDJWriter(output_path, fs_test)
+    test_writer.write()
+
+    # read in the written file into a featureset and confirm that the
+    # two featuresets are equal
+    fs_test2 = NDJReader.for_path(output_path).read()
+
+    assert fs_test == fs_test2
+
+
+@attr('have_pandas_and_seaborn')
+def test_featureset_creation_from_dataframe_with_string_labels():
+
+    import pandas
+
+    dftest = pandas.DataFrame({"id": [1, 2],
+                               "score": ['yes', 'no'],
+                               "text": ["a b", "b c"]})
+    dftest.set_index("id", inplace=True)
+    test_feat_dict_list = [{'a': 1.0, 'b': 1.0}, {'b': 1.0, 'c': 1.0}]
+    test_dict_vectorizer = DictVectorizer()
+    Xtest = test_dict_vectorizer.fit_transform(test_feat_dict_list)
+    fs_test = FeatureSet('test',
+                         ids=dftest.index.values,
+                         labels=dftest['score'].values,
+                         features=Xtest,
+                         vectorizer=test_dict_vectorizer)
+    output_path = join(_my_dir, "other", "test_string_labels_df.jsonlines")
+    test_writer = NDJWriter(output_path, fs_test)
+    test_writer.write()
+
+    # read in the written file into a featureset and confirm that the
+    # two featuresets are equal
+    fs_test2 = NDJReader.for_path(output_path, ids_to_floats=True).read()
+
+    assert fs_test == fs_test2
diff --git a/tests/test_input.py b/tests/test_input.py
index a36b9fe5..158c2f27 100644
--- a/tests/test_input.py
+++ b/tests/test_input.py
@@ -62,6 +62,10 @@ def tearDown():
     config_dir = join(_my_dir, 'configs')
     for config_file in glob(join(config_dir, 'test_config_parsing_*.cfg')):
         os.unlink(config_file)
+    for auto_dir in glob(join(_my_dir, 'auto*')):
+        for auto_dir_file in os.listdir(auto_dir):
+            os.unlink(join(auto_dir, auto_dir_file))
+        os.rmdir(auto_dir)
 
 
 def check_safe_float_conversion(converted_val, expected_val):
@@ -1119,6 +1123,60 @@ def test_config_parsing_relative_input_paths():
      learning_curve_train_sizes, output_metrics) = _parse_config_file(config_path)
 
 
+def test_config_parsing_automatic_output_directory_creation():
+
+    train_dir = '../train'
+    train_file = join(train_dir, 'f0.jsonlines')
+    test_file = join(train_dir, 'f1.jsonlines')
+    output_dir = '../output'
+
+    # make a simple config file that has new directories that should
+    # be automatically created
+    new_log_path = join(_my_dir, 'autolog')
+    new_results_path = join(_my_dir, 'autoresults')
+    new_models_path = join(_my_dir, 'automodels')
+    new_predictions_path = join(_my_dir, 'autopredictions')
+
+    ok_(not(exists(new_log_path)))
+    ok_(not(exists(new_results_path)))
+    ok_(not(exists(new_models_path)))
+    ok_(not(exists(new_predictions_path)))
+
+    values_to_fill_dict = {'experiment_name': 'auto_dir_creation',
+                           'task': 'evaluate',
+                           'train_file': train_file,
+                           'test_file': test_file,
+                           'learners': "['LogisticRegression']",
+                           'log': new_log_path,
+                           'results': new_results_path,
+                           'models': new_models_path,
+                           'predictions': new_predictions_path,
+                           'objective': 'f1_score_micro'}
+
+    config_template_path = join(_my_dir, 'configs',
+                                'test_relative_paths.template.cfg')
+    config_path = fill_in_config_options(config_template_path,
+                                         values_to_fill_dict,
+                                         'auto_dir_creation')
+
+    (experiment_name, task, sampler, fixed_sampler_parameters,
+     feature_hasher, hasher_features, id_col, label_col, train_set_name,
+     test_set_name, suffix, featuresets, do_shuffle, model_path,
+     do_grid_search, grid_objective, probability, results_path,
+     pos_label_str, feature_scaling, min_feature_count, folds_file,
+     grid_search_jobs, grid_search_folds, cv_folds, save_cv_folds,
+     use_folds_file_for_grid_search, do_stratified_folds,
+     fixed_parameter_list, param_grid_list, featureset_names, learners,
+     prediction_dir, log_path, train_path, test_path, ids_to_floats,
+     class_map, custom_learner_path, learning_curve_cv_folds_list,
+     learning_curve_train_sizes, output_metrics) = _parse_config_file(config_path)
+
+    ok_(exists(new_log_path))
+    ok_(exists(new_results_path))
+    ok_(exists(new_models_path))
+    ok_(exists(new_predictions_path))
+
+
 def check_config_parsing_metrics_and_objectives_overlap(task,
                                                         metrics,
                                                         objectives):
diff --git a/tests/test_regression.py b/tests/test_regression.py
index 098ed8ac..c090082c 100644
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -135,7 +135,7 @@ def check_rescaling(name, grid_search=False):
     train_p_std = np.std(train_predictions)
     rescaled_train_p_std = np.std(rescaled_train_predictions)
     assert_less(abs(rescaled_train_p_std - train_y_std),
-                      abs(train_p_std - train_y_std))
+                abs(train_p_std - train_y_std))
 
 
 def test_rescaling():
@@ -403,14 +403,14 @@ def check_ensemble_models(name,
         else:
             expected_feature_importances = [0.10266744, 0.18681777, 0.71051479]
     else:
-        expected_feature_importances = ([0.204,
-                                         0.172,
-                                         0.178,
-                                         0.212,
-                                         0.234] if use_feature_hashing else
-                                        [0.262,
-                                         0.288,
-                                         0.45])
+        expected_feature_importances = ([0.471714,
+                                         0.022797,
+                                         0.283377,
+                                         0.170823,
+                                         0.051288] if use_feature_hashing else
+                                        [0.082621,
+                                         0.166652,
+                                         0.750726])
 
     feature_importances = learner.model.feature_importances_
     assert_allclose(feature_importances, expected_feature_importances,
@@ -611,7 +611,7 @@ def test_ransac_regression():
                                                      'SGDRegressor',
                                                      'DecisionTreeRegressor',
                                                      'SVR'],
-                                                     [0.95, 0.45, 0.75, 0.65]):
+                                                    [0.95, 0.45, 0.75, 0.65]):
         yield check_ransac_regression, base_estimator_name, pearson_value
 
 
@@ -627,7 +627,7 @@ def check_mlp_regression(use_rescaling=False):
     # we don't want to see any convergence warnings during the grid search
     with warnings.catch_warnings():
         warnings.filterwarnings('ignore', category=ConvergenceWarning)
-        learner.train(train_fs, grid_search=True, grid_objective='pearson')
+        learner.train(train_fs, grid_search=False)
 
     # now generate the predictions on the test set
     predictions = learner.predict(test_fs)
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
index 2b26c57f..634623bc 100644
--- a/tests/test_utilities.py
+++ b/tests/test_utilities.py
@@ -11,6 +11,7 @@
 import ast
 
 import copy
+import csv
 import itertools
 import os
 import sys
@@ -29,6 +30,7 @@
 from nose.plugins.logcapture import LogCapture
 from nose.tools import eq_, assert_almost_equal, raises
 from numpy.testing import assert_allclose, assert_array_almost_equal
+from numpy import concatenate
 
 import skll
 import skll.utilities.compute_eval_from_predictions as cefp
@@ -284,19 +286,29 @@ def test_compute_eval_from_predictions_random_choice():
     eq_(pred, 'C')
 
 
-def check_generate_predictions(use_feature_hashing=False, use_threshold=False):
-
-    # create some simple classification data without feature hashing
-    train_fs, test_fs = make_classification_data(
-        num_examples=1000, num_features=5,
-        use_feature_hashing=use_feature_hashing, feature_bins=4)
+def check_generate_predictions(use_feature_hashing=False,
+                               use_threshold=False,
+                               test_on_subset=False,
+                               use_all_labels=False):
 
+    # create some simple classification feature sets for training and testing
+    train_fs, test_fs = make_classification_data(num_examples=1000,
+                                                 num_features=5,
+                                                 use_feature_hashing=use_feature_hashing,
+                                                 feature_bins=4)
+    enable_probability = use_threshold or use_all_labels
     # create a learner that uses an SGD classifier
-    learner = Learner('SGDClassifier', probability=use_threshold)
+    learner = Learner('SGDClassifier', probability=enable_probability)
 
     # train the learner with grid search
     learner.train(train_fs, grid_search=True)
 
+    # if we are asked to use only a subset, then filter out
+    # one of the features if we are not using feature hashing,
+    # do nothing if we are using feature hashing
+    if test_on_subset and not use_feature_hashing:
+        test_fs.filter(features=['f01', 'f02', 'f03', 'f04'])
+
     # get the predictions on the test featureset
     predictions = learner.predict(test_fs)
 
@@ -316,24 +328,90 @@ def check_generate_predictions(use_feature_hashing=False, use_threshold=False):
 
     # now use Predictor to generate the predictions and make
     # sure that they are the same as before saving the model
-    p = gp.Predictor(model_file, threshold=threshold)
+    p = gp.Predictor(model_file, threshold=threshold,
+                     all_labels=use_all_labels)
+
+    assert(p._pos_index == 1)
+    assert(p.threshold == threshold)
+
     predictions_after_saving = p.predict(test_fs)
 
     eq_(predictions, predictions_after_saving)
 
 
-def test_generate_predictions():
+def check_generate_predictions_file_headers(use_threshold=False,
+                                            use_all_labels=False):
+    # create some simple classification feature sets for training and testing
+    train_fs, test_fs = make_classification_data(num_examples=1000,
+                                                 num_features=5,
+                                                 feature_bins=4)
+    enable_probability = use_threshold or use_all_labels
+    # create a learner that uses an SGD classifier
+    learner = Learner('SGDClassifier', probability=enable_probability)
+
+    # train the learner with grid search
+    learner.train(train_fs, grid_search=True)
+
+    # get the predictions on the test featureset
+    predictions = learner.predict(test_fs)
+
+    # if we asked for probabilities, then use the threshold
+    # to convert them into binary predictions
+    if use_threshold:
+        threshold = 0.6
+    else:
+        threshold = None
+
+    # save the learner to a file
+    model_file = join(_my_dir, 'output',
+                      'test_generate_predictions.model')
+    learner.save(model_file)
+
+    # now use Predictor to generate the predictions and make
+    # sure that they are the same as before saving the model
+    p = gp.Predictor(model_file, threshold=threshold,
+                     all_labels=use_all_labels)
+    predictions_after_saving = p.predict(test_fs)
+
+    if threshold:
+        assert (p.output_file_header == ['id', 'prediction'])
+    elif use_all_labels:
+        assert (p.output_file_header == ['id', '0', '1'])
+
+
+
+@raises(ValueError)
+def test_generate_predictions_conflicting_params():
     """
-    Test generate predictions API with hashing and a threshold
+    Test that ValueError is raised when `generate_predictions.Predictor` is
+    initialized with both `threshold` and `all_labels` turned on.
     """
+    model_file = "not/real/model/file.model"
+    gp.Predictor(model_file, threshold=0.6, all_labels=True)
+
+
+def test_generate_predictions():
+    for (use_feature_hashing,
+         use_threshold,
+         test_on_subset,
+         all_probabilities) in product([True, False], [True, False],
+                                       [True, False], [True, False]):
+        if use_threshold and all_probabilities:
+            continue
+        yield (check_generate_predictions, use_feature_hashing,
+               use_threshold, test_on_subset, all_probabilities)
+
 
-    yield check_generate_predictions, False, False
-    yield check_generate_predictions, True, False
-    yield check_generate_predictions, False, True
-    yield check_generate_predictions, True, True
+def test_generate_predictions_file_header():
 
+    for (use_threshold, all_probabilities) in ([True, False], [False, True]):
+        if use_threshold and all_probabilities:
+            continue
+        yield (check_generate_predictions_file_headers,
+               use_threshold, all_probabilities)
 
-def check_generate_predictions_console(use_threshold=False):
+
+def check_generate_predictions_console(use_threshold=False, all_labels=False):
 
     # create some simple classification data without feature hashing
     train_fs, test_fs = make_classification_data(num_examples=1000,
@@ -345,8 +423,9 @@ def check_generate_predictions_console(use_threshold=False):
     writer = NDJWriter(input_file, test_fs)
     writer.write()
 
+    enable_probability = use_threshold or all_labels
     # create a learner that uses an SGD classifier
-    learner = Learner('SGDClassifier', probability=use_threshold)
+    learner = Learner('SGDClassifier', probability=enable_probability)
 
     # train the learner with grid search
     learner.train(train_fs, grid_search=True)
@@ -372,6 +451,9 @@ def check_generate_predictions_console(use_threshold=False):
     generate_cmd = []
     if use_threshold:
         generate_cmd.append('-t {}'.format(threshold))
+    elif all_labels:
+        generate_cmd.append('-a')
+
     generate_cmd.extend([model_file, input_file])
 
     # we need to capture stdout since that's what main() writes to
@@ -384,21 +466,265 @@ def check_generate_predictions_console(use_threshold=False):
         gp.main(generate_cmd)
         out = mystdout.getvalue()
         err = mystderr.getvalue()
-        predictions_after_saving = [int(x) for x in out.strip().split('\n')]
-        eq_(predictions, predictions_after_saving)
+        output_lines = out.strip().split('\n')[1:]  # Skip headers
+        if all_labels:
+            # Ignore the id (first column) in output.
+            predictions_after_saving = [[float(p) for p in x.split('\t')[1:]]
+                                        for x in output_lines]
+        else:
+            # Ignore the id (first column) in output.
+            predictions_after_saving = [int(x.split('\t')[1])
+                                        for x in output_lines]
+        if all_labels:
+            assert_array_almost_equal(predictions, predictions_after_saving)
+        else:
+            eq_(predictions, predictions_after_saving)
+    finally:
+        sys.stdout = old_stdout
+        sys.stderr = old_stderr
+        print(err)
+
+def test_generate_predictions_console_bad_input_ext():
+    lc = LogCapture()
+    lc.begin()
+
+    # create some simple classification data without feature hashing
+    train_fs, test_fs = make_classification_data(num_examples=1000,
+                                                 num_features=5)
+
+    # create a learner that uses an SGD classifier
+    learner = Learner('SGDClassifier')
+    # train the learner with grid search
+    learner.train(train_fs, grid_search=True)
+    # get the predictions on the test featureset
+    predictions = learner.predict(test_fs)
+    # save the learner to a file
+    model_file = join(_my_dir, 'output',
+                      'test_generate_predictions_console.model')
+    learner.save(model_file)
+
+    # now call main() from generate_predictions.py
+    generate_cmd = [model_file, "fake_input_file.txt"]
+
+    # we need to capture stdout since that's what main() writes to
+    err = ''
+    try:
+        old_stdout = sys.stdout
+        old_stderr = sys.stderr
+        sys.stdout = mystdout = StringIO()
+        sys.stderr = mystderr = StringIO()
+        gp.main(generate_cmd)
+        out = mystdout.getvalue()
+        err = mystderr.getvalue()
     finally:
         sys.stdout = old_stdout
         sys.stderr = old_stderr
         print(err)
 
+    expected_log_mssg = ("skll.utilities.generate_predictions: ERROR: Input "
+                         "file must be in either .arff, .csv, .jsonlines, "
+                         ".libsvm, .megam, .ndj, or .tsv format.  Skipping "
+                         "file fake_input_file.txt")
+
+    eq_(lc.handler.buffer[-1], expected_log_mssg)
+
 
 def test_generate_predictions_console():
     """
     Test generate_predictions as a console script with/without a threshold
     """
 
-    yield check_generate_predictions_console, False
-    yield check_generate_predictions_console, True
+    yield check_generate_predictions_console, False, False
+    yield check_generate_predictions_console, False, True
+    yield check_generate_predictions_console, True, False
+
+
+def check_generate_predictions_file_output_multi_infiles(use_threshold=False,
+                                                         all_labels=False):
+    """
+    Make sure generate_predictions works with multiple input files.
+    """
+
+    # create some simple classification data without feature hashing
+    train_fs, test_fs = make_classification_data(num_examples=1000,
+                                                 num_features=5)
+
+    # save the test feature set to an NDJ file
+    input_file = join(_my_dir, 'test', 'test_generate_predictions.jsonlines')
+    writer = NDJWriter(input_file, test_fs)
+    writer.write()
+
+    enable_probability = use_threshold or all_labels
+    # create a learner that uses an SGD classifier
+    learner = Learner('SGDClassifier', probability=enable_probability)
+
+    # train the learner with grid search
+    learner.train(train_fs, grid_search=True)
+
+    # get the predictions on the test featureset
+    predictions = learner.predict(test_fs)
+    predictions = concatenate([predictions, predictions])
+
+    # if we asked for probabilities, then use the threshold
+    # to convert them into binary predictions
+    if use_threshold:
+        threshold = 0.6
+        predictions = [int(p[1] >= threshold) for p in predictions]
+    else:
+        predictions = predictions.tolist()
+        threshold = None
+
+    # save the learner to a file
+    model_file = join(_my_dir, 'output',
+                      'test_generate_predictions_console.model')
+    learner.save(model_file)
+
+    # now call main() from generate_predictions.py
+    generate_cmd = []
+    if use_threshold:
+        generate_cmd.append('-t {}'.format(threshold))
+    elif all_labels:
+        generate_cmd.append('-a')
+
+    output_file_path = join(_my_dir, 'output',
+                            'output_test_{}_{}_MULTI.tsv'
+                            .format(use_threshold, all_labels))
+    generate_cmd.extend(["--output_file", output_file_path])
+
+    generate_cmd.extend([model_file, input_file, input_file])
+
+    gp.main(generate_cmd)
+
+    with open(output_file_path) as saved_predictions_file:
+        predictions_after_saving = []
+        reader = csv.reader(saved_predictions_file, delimiter=str("\t"))
+        next(reader)
+        if all_labels:
+            for row in reader:
+                predictions_after_saving.append([float(r) for r in row[1:]])
+        else:
+            for row in reader:
+                predictions_after_saving.append(float(row[1]))
+
+    assert_array_almost_equal(predictions, predictions_after_saving)
+
+
+def test_generate_predictions_file_output_multi_infiles():
+    """
+    Test generate_predictions file output with/without a threshold
+    """
+
+    yield check_generate_predictions_file_output_multi_infiles, False, False
+    yield check_generate_predictions_file_output_multi_infiles, False, True
+    yield check_generate_predictions_file_output_multi_infiles, True, False
+
+
+
+def check_generate_predictions_file_output(use_threshold=False,
+                                           all_labels=False):
+
+    # create some simple classification data without feature hashing
+    train_fs, test_fs = make_classification_data(num_examples=1000,
+                                                 num_features=5)
+
+    # save the test feature set to an NDJ file
+    input_file = join(_my_dir, 'test', 'test_generate_predictions.jsonlines')
+    writer = NDJWriter(input_file, test_fs)
+    writer.write()
+
+    enable_probability = use_threshold or all_labels
+    # create a learner that uses an SGD classifier
+    learner = Learner('SGDClassifier', probability=enable_probability)
+
+    # train the learner with grid search
+    learner.train(train_fs, grid_search=True)
+
+    # get the predictions on the test featureset
+    predictions = learner.predict(test_fs)
+
+    # if we asked for probabilities, then use the threshold
+    # to convert them into binary predictions
+    if use_threshold:
+        threshold = 0.6
+        predictions = [int(p[1] >= threshold) for p in predictions]
+    else:
+        predictions = predictions.tolist()
+        threshold = None
+
+    # save the learner to a file
+    model_file = join(_my_dir, 'output',
+                      'test_generate_predictions_console.model')
+    learner.save(model_file)
+
+    # now call main() from generate_predictions.py
+    generate_cmd = []
+    if use_threshold:
+        generate_cmd.append('-t {}'.format(threshold))
+    elif all_labels:
+        generate_cmd.append('-a')
+
+    output_file_path = join(_my_dir, 'output',
+                            'output_test_{}_{}.tsv'
+                            .format(use_threshold, all_labels))
+    generate_cmd.extend(["--output_file", output_file_path])
+
+    generate_cmd.extend([model_file, input_file])
+    gp.main(generate_cmd)
+
+    with open(output_file_path) as saved_predictions_file:
+        predictions_after_saving = []
+        reader = csv.reader(saved_predictions_file, delimiter=str("\t"))
+        next(reader)
+        if all_labels:
+            for row in reader:
+                predictions_after_saving.append([float(r) for r in row[1:]])
+        else:
+            for row in reader:
+                predictions_after_saving.append(float(row[1]))
+
+    assert_array_almost_equal(predictions, predictions_after_saving)
+
+
+def test_generate_predictions_file_output():
+    """
+    Test generate_predictions file output with/without a threshold
+    """
+
+    yield check_generate_predictions_file_output, False, False
+    yield check_generate_predictions_file_output, False, True
+    yield check_generate_predictions_file_output, True, False
+
+
+
+
+@raises(SystemExit)
+def test_mutually_exclusive_generate_predictions_args():
+    # create some simple classification data without feature hashing
+    train_fs, test_fs = make_classification_data(num_examples=1000,
+                                                 num_features=5)
+    threshold = 0.6
+
+    # save the test feature set to an NDJ file
+    input_file = join(_my_dir, 'test',
+                      'test_generate_predictions.jsonlines')
+    writer = NDJWriter(input_file, test_fs)
+    writer.write()
+
+    # create a learner that uses an SGD classifier
+    learner = Learner('SGDClassifier')
+
+    # train the learner with grid search
+    learner.train(train_fs, grid_search=True)
+
+    # save the learner to a file
+    model_file = join(_my_dir, 'output',
+                      'test_generate_predictions_console.model')
+    learner.save(model_file)
+
+    # now call main() from generate_predictions.py
+    generate_cmd = ['-t {}'.format(threshold), '-a']
+    generate_cmd.extend([model_file, input_file])
+    gp.main(generate_cmd)
 
 
 def check_skll_convert(from_suffix, to_suffix):
@@ -522,7 +848,7 @@ def check_print_model_weights(task='classification'):
     # create some simple classification or regression data
     if task == 'classification' or task == 'classification_no_intercept':
         train_fs, _ = make_classification_data(train_test_ratio=0.8)
-    elif task == 'multiclass_classification':
+    elif task in ['multiclass_classification', 'multiclass_classification_svc']:
         train_fs, _ = make_classification_data(train_test_ratio=0.8, num_labels=3)
     else:
         train_fs, _, _ = make_regression_data(num_features=4,
@@ -532,9 +858,14 @@ def check_print_model_weights(task='classification'):
     if task == 'classification' or task == 'multiclass_classification':
         learner = Learner('LogisticRegression')
         learner.train(train_fs, grid_objective='f1_score_micro')
+    elif task == 'multiclass_classification_svc':
+        learner = Learner('SVC', model_kwargs={'kernel': 'linear'})
+        learner.train(train_fs, grid_objective='f1_score_micro')
     elif task == 'classification_no_intercept':
         learner = Learner('LogisticRegression')
-        learner.train(train_fs, grid_objective='f1_score_micro', param_grid=[{'fit_intercept':[False]}])
+        learner.train(train_fs,
+                      grid_objective='f1_score_micro',
+                      param_grid=[{'fit_intercept': [False]}])
     elif task == 'regression':
         learner = Learner('LinearRegression')
         learner.train(train_fs, grid_objective='pearson')
@@ -596,6 +927,49 @@ def check_print_model_weights(task='classification'):
             assert_array_almost_equal(weights, feature_values[index])
 
         assert_array_almost_equal(intercept, learner.model.intercept_)
+    elif task == 'multiclass_classification_svc':
+        # for multiple classes with the SVC with a linear kernel,
+        # we get an intercept for each class pair combination
+        # as well as a list of weights for each class pair
+        # combination
+
+        # save the computed intercept values in a dictionary
+        # with the class oair label as the key
+        lines_to_parse = [l for l in out.split('\n')[1:] if l]
+        parsed_intercepts_dict = {}
+        for intercept_string in lines_to_parse[0:3]:
+            fields = intercept_string.split('\t')
+            parsed_intercepts_dict[fields[1]] = safe_float(fields[0])
+
+        # save the computed feature weights in a dictionary
+        # with the class pair label as the key and the value
+        # being a list; each feature weight for this class pair
+        # is stored at the index of the feature name as given
+        # by the feature vectorizer vocabulary dictionary
+        parsed_weights_dict = {}
+        for ltp in lines_to_parse[3:]:
+            (weight, class_pair, feature) = ltp.split('\t')
+            if class_pair not in parsed_weights_dict:
+                parsed_weights_dict[class_pair] = [0] * 10
+            feature_index = learner.feat_vectorizer.vocabulary_[feature]
+            parsed_weights_dict['{}'.format(class_pair)][feature_index] = safe_float(weight)
+
+        # to validate that our coefficients are correct, we will
+        # get the coefficient array (for all features) from `coef_`
+        # for a particular class pair and then check that this array
+        # is equal to the list that we computed above. We will do
+        # the same for intercepts which are even easier to validate
+        # since they _only_ depend on the class pair
+        for idx, (class1, class2) in enumerate(itertools.combinations([0, 1, 2], 2)):
+            class_pair_label = '{}-vs-{}'.format(class1, class2)
+            computed_coefficients = parsed_weights_dict[class_pair_label]
+            expected_coefficients = learner.model.coef_[idx].toarray()[0]
+            assert_array_almost_equal(computed_coefficients, expected_coefficients)
+
+            computed_intercept = parsed_intercepts_dict[class_pair_label]
+            expected_intercept = learner.model.intercept_[idx]
+            assert_almost_equal(computed_intercept, expected_intercept)
+
     elif task == 'classification_no_intercept':
         lines_to_parse = [l for l in out.split('\n')[0:] if l]
         intercept = safe_float(lines_to_parse[0].split('=')[1])
@@ -637,6 +1011,7 @@ def check_print_model_weights(task='classification'):
 def test_print_model_weights():
     yield check_print_model_weights, 'classification'
     yield check_print_model_weights, 'multiclass_classification'
+    yield check_print_model_weights, 'multiclass_classification_svc'
     yield check_print_model_weights, 'classification_no_intercept'
     yield check_print_model_weights, 'regression'
     yield check_print_model_weights, 'regression_linearSVR'