Merge branch 'master' of https://github.com/EducationalTestingService…

…/skll into feature/skll-261-pandas-dataframe-helper
EducationalTestingService · May 16, 2016 · 99012a9 · 99012a9
2 parents 89dc0ad + 5e40b4c
commit 99012a9
Show file tree

Hide file tree

Showing 30 changed files with 502 additions and 160 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -22,7 +22,7 @@ before_install:
   - conda config --add channels desilinguist
   - conda update --yes conda
 install:
-  - conda install --yes python=$TRAVIS_PYTHON_VERSION nomkl numpy scipy beautiful-soup six scikit-learn==0.17.0 joblib prettytable python-coveralls pyyaml
+  - conda install --yes python=$TRAVIS_PYTHON_VERSION nomkl numpy scipy beautiful-soup six scikit-learn==0.17.1 joblib prettytable python-coveralls pyyaml
   - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then conda install --yes configparser logutils mock; fi
   - if [ ${WITH_PANDAS} == "true" ]; then conda install --yes pandas; fi
   # Have to use pip for nose-cov because its entry points are not supported by conda yet

diff --git a/README.rst b/README.rst
@@ -62,7 +62,7 @@ like:
   # Should we tune parameters of all learners by searching provided parameter grids?
   grid_search = true
   # Function to maximize when performing grid search
-  objective = accuracy
+  objectives = ['accuracy']
   
   [Output]
   # again, these can/should be absolute paths

diff --git a/conda.yaml → conda-recipe/unix/skll/meta.yaml b/conda.yaml → conda-recipe/unix/skll/meta.yaml
@@ -1,14 +1,16 @@
 package:
   name: skll
-  version: {{ environ.get('GIT_DESCRIBE_TAG', '').replace('v', '') }}
+  version: 1.2
 
 source:
-  git_url: ./
+  path: ../../../../skll
 
 build:
-  number: {{ environ.get('GIT_DESCRIBE_NUMBER', 0) }}
+  number: 0
+  track_features:
+    - nomkl
   script:
-    - cd $RECIPE_DIR
+    - cd $SRC_DIR
     - $PYTHON setup.py install
   entry_points:
     - compute_eval_from_predictions = skll.utilities.compute_eval_from_predictions:main
@@ -39,9 +41,10 @@ build:
 requirements:
   build:
     - python
+    - nomkl
     - joblib
     - setuptools
-    - scikit-learn 0.17.0
+    - scikit-learn 0.17.1
     - six
     - prettytable
     - beautiful-soup
@@ -55,8 +58,9 @@ requirements:
 
   run:
     - python
+    - nomkl
     - joblib
-    - scikit-learn 0.17.0
+    - scikit-learn 0.17.1
     - six
     - prettytable
     - beautiful-soup

diff --git a/conda-recipe/windows/skll/meta.yaml b/conda-recipe/windows/skll/meta.yaml
@@ -0,0 +1,88 @@
+package:
+  name: skll
+  version: 1.2
+
+source:
+  path: ../../../../skll
+
+build:
+  number: 0
+  script:
+    - cd $SRC_DIR
+    - $PYTHON setup.py install
+  entry_points:
+    - compute_eval_from_predictions = skll.utilities.compute_eval_from_predictions:main
+    - filter_features = skll.utilities.filter_features:main
+    - generate_predictions = skll.utilities.generate_predictions:main
+    - join_features = skll.utilities.join_features:main
+    - print_model_weights = skll.utilities.print_model_weights:main
+    - run_experiment = skll.utilities.run_experiment:main
+    - skll_convert = skll.utilities.skll_convert:main
+    - summarize_results = skll.utilities.summarize_results:main
+    - compute_eval_from_predictions2 = skll.utilities.compute_eval_from_predictions:main [py2k]
+    - filter_features2 = skll.utilities.filter_features:main [py2k]
+    - generate_predictions2 = skll.utilities.generate_predictions:main [py2k]
+    - join_features2 = skll.utilities.join_features:main [py2k]
+    - print_model_weights2 = skll.utilities.print_model_weights:main [py2k]
+    - run_experiment2 = skll.utilities.run_experiment:main [py2k]
+    - skll_convert2 = skll.utilities.skll_convert:main [py2k]
+    - summarize_results2 = skll.utilities.summarize_results:main [py2k]
+    - compute_eval_from_predictions3 = skll.utilities.compute_eval_from_predictions:main [py3k]
+    - filter_features3 = skll.utilities.filter_features:main [py3k]
+    - generate_predictions3 = skll.utilities.generate_predictions:main [py3k]
+    - join_features3 = skll.utilities.join_features:main [py3k]
+    - print_model_weights3 = skll.utilities.print_model_weights:main [py3k]
+    - run_experiment3 = skll.utilities.run_experiment:main [py3k]
+    - skll_convert3 = skll.utilities.skll_convert:main [py3k]
+    - summarize_results3 = skll.utilities.summarize_results:main [py3k]
+
+requirements:
+  build:
+    - python
+    - joblib
+    - setuptools
+    - scikit-learn 0.17.1
+    - six
+    - prettytable
+    - beautiful-soup
+    - numpy
+    - scipy
+    - pyyaml
+    - configparser [py2k]
+    - futures [py2k]
+    - logutils [py2k]
+    - mock [py2k]
+
+  run:
+    - python
+    - joblib
+    - scikit-learn 0.17.1
+    - six
+    - prettytable
+    - beautiful-soup
+    - numpy
+    - scipy
+    - pyyaml
+    - configparser [py2k]
+    - futures [py2k]
+    - logutils [py2k]
+    - mock [py2k]
+
+test:
+  # Python imports
+  imports:
+    - skll
+
+  commands:
+    - compute_eval_from_predictions --help
+    - filter_features --help
+    - generate_predictions --help
+    - join_features --help
+    - print_model_weights --help
+    - run_experiment --help
+    - skll_convert --help
+    - summarize_results --help
+
+about:
+  home: http://github.com/EducationalTestingService/skll
+  license: BSD 3-clause
diff --git a/doc/run_experiment.rst b/doc/run_experiment.rst
@@ -414,7 +414,7 @@ custom_learner_path *(Optional)*
 """"""""""""""""""""""""""""""""
 
 Path to a ``.py`` file that defines a custom learner.  This file will be
-imported dynamically.  This is only required if a custom learner in specified
+imported dynamically.  This is only required if a custom learner is specified
 in the list of :ref:`learners`.
 
 All Custom learners must implement the ``fit`` and
@@ -562,7 +562,7 @@ SVR
 
        {'class_weight': 'balanced'}
 
-    The second option allows you to assign an specific weight per each
+    The second option allows you to assign a specific weight per each
     class. The default weight per class is 1. For example:
 
     .. code-block:: python
@@ -581,7 +581,7 @@ scale by mean, your data will automatically be converted to dense, so use
 caution when you have a very large dataset. Valid options are:
 
 none
-    perform no feature scaling at all.
+    Perform no feature scaling at all.
 
 with_std
     Scale feature values by their standard deviation.
@@ -590,7 +590,7 @@ with_mean
     Center features by subtracting their mean.
 
 both
-    perform both centering and scaling.
+    Perform both centering and scaling.
 
 Defaults to ``none``.
 
@@ -630,12 +630,12 @@ min_feature_count *(Optional)*
 The minimum number of examples for which the value of a feature must be nonzero
 to be included in the model. Defaults to 1.
 
-.. _objective:
+.. _objectives:
 
-objective *(Optional)*
+objectives *(Optional)*
 """"""""""""""""""""""
 
-The objective function to use for tuning. Valid options are:
+The objective functions to use for tuning. This is a list of one or more objective functions. Valid options are:
 
 .. _classification_obj:
 
@@ -694,7 +694,9 @@ Regression:
     *   **mean_squared_error**: `Mean squared error regression loss <http://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html>`__
 
 
-Defaults to ``f1_score_micro``.
+Defaults to ``['f1_score_micro']``.
+
+**Note**: Using ``objective=x`` instead of ``objectives=['x']`` is also acceptable, for backward-compatibility.
 
 .. _param_grids:
 
@@ -839,7 +841,7 @@ Using run_experiment
 --------------------
 .. program:: run_experiment
 
-Once you have create the :ref:`configuration file <create_config>` for your
+Once you have created the :ref:`configuration file <create_config>` for your
 experiment, you can usually just get your experiment started by running
 ``run_experiment CONFIGFILE``. That said, there are a few options that are
 specified via command-line arguments instead of in the configuration file:
@@ -919,8 +921,8 @@ Output files
 ^^^^^^^^^^^^
 
 The result, log, model, and prediction files generated by run_experiment will
-all share the following automatically generated prefix
-``EXPERIMENT_FEATURESET_LEARNER``, where the following definitions hold:
+all share the automatically generated prefix
+``EXPERIMENT_FEATURESET_LEARNER_OBJECTIVE``, where the following definitions hold:
 
     ``EXPERIMENT``
         The name specified as :ref:`experiment_name` in the configuration file.
@@ -931,6 +933,13 @@ all share the following automatically generated prefix
     ``LEARNER``
         The learner the current results/model/etc. was generated using.
 
+    ``OBJECTIVE``
+        The objective function the current results/model/etc. was generated using.
+
+However, if ``objectives`` contains only one objective function,
+the result, log, model, and prediction files will share the prefix
+``EXPERIMENT_FEATURESET_LEARNER``. For backward-compatibility, the same applies when a single objective is specified using ``objective=x``.
+
 For every experiment you run, there will also be a result summary file
 generated that is a tab-delimited file summarizing the results for each
 learner-featureset combination you have in your configuration file. It is named

diff --git a/doc/tutorial.rst b/doc/tutorial.rst
@@ -90,9 +90,10 @@ The :ref:`Tuning` section defines how we want our model to be tuned.  Setting
 `GridSearchCV <http://scikit-learn.org/stable/modules/generated/sklearn.grid_search.GridSearchCV.html#sklearn.grid_search.GridSearchCV>`_
 class, which is an implementation of the
 `standard, brute-force approach to hyperparameter optimization <http://en.wikipedia.org/wiki/Hyperparameter_optimization#Grid_search>`_.
-:ref:`objective <objective>` refers to the desired objective function; here,
-``accuracy`` will optimize for overall accuracy.  You can see a list of all of
-all the available objective functions :ref:`here <objective>`.
+
+:ref:`objectives <objectives>` refers to the desired objective functions; here,
+``accuracy`` will optimize for overall accuracy.  You can see a list of all the 
+available objective functions :ref:`here <objectives>`.
 
 In the :ref:`Output` section, the arguments to each of these are directories
 where you'd like all of the relevant output from your experiment to go.
@@ -156,9 +157,9 @@ That should produce output like::
 We could squelch the warnings about shuffling by setting
 :ref:`shuffle <shuffle>` to ``True`` in the :ref:`Input` section.
 
-The reason we see the loading messages repeated is that we are running the 
+The reason we see the loading messages repeated is that we are running the
 different learners sequentially, whereas SKLL is designed to take advantage
-of a cluster to execute everything in parallel via GridMap. 
+of a cluster to execute everything in parallel via GridMap.
 
 
 Examine the results
@@ -255,7 +256,7 @@ Training and testing directories
 """"""""""""""""""""""""""""""""
 
 At minimum you will probably want to work with a training set and a testing
-set.  If you have multiple feature files that you would SKLL to join together
+set.  If you have multiple feature files that you would like SKLL to join together
 for you automatically, you will need to create feature files with the exact
 same names and store them in training and testing directories.  You can
 specifiy these directories in your config file using

diff --git a/examples/boston/cross_val.cfg b/examples/boston/cross_val.cfg
@@ -13,7 +13,7 @@ suffix = .jsonlines
 
 [Tuning]
 grid_search = true
-objective = pearson
+objectives = ['pearson']
 
 [Output]
 # again, these can be absolute paths

diff --git a/examples/boston/evaluate.cfg b/examples/boston/evaluate.cfg
@@ -15,7 +15,7 @@ suffix = .jsonlines
 [Tuning]
 grid_search = true
 #objective = f1_score_micro
-objective = unweighted_kappa
+objectives = ['unweighted_kappa']
 
 [Output]
 # again, these can be absolute paths

diff --git a/examples/iris/cross_val.cfg b/examples/iris/cross_val.cfg
@@ -14,7 +14,7 @@ suffix = .jsonlines
 
 [Tuning]
 grid_search = true
-objective = f1_score_micro
+objectives = ['f1_score_micro']
 
 [Output]
 # again, these can be absolute paths

diff --git a/examples/iris/evaluate.cfg b/examples/iris/evaluate.cfg
@@ -15,7 +15,7 @@ suffix = .jsonlines
 
 [Tuning]
 grid_search = true
-objective = f1_score_micro
+objectives = ['f1_score_micro']
 
 [Output]
 # again, these can be absolute paths

diff --git a/examples/titanic/cross_validate.cfg b/examples/titanic/cross_validate.cfg
@@ -12,7 +12,7 @@ id_col = PassengerId
 
 [Tuning]
 grid_search = true
-objective = accuracy
+objectives = ['accuracy']
 
 [Output]
 # again, these can be absolute paths

diff --git a/examples/titanic/evaluate.cfg b/examples/titanic/evaluate.cfg
@@ -13,7 +13,7 @@ id_col = PassengerId
 
 [Tuning]
 grid_search = false
-objective = accuracy
+objectives = ['accuracy']
 
 [Output]
 # again, these can be absolute paths

diff --git a/examples/titanic/evaluate_tuned.cfg b/examples/titanic/evaluate_tuned.cfg
@@ -14,7 +14,7 @@ id_col = PassengerId
 
 [Tuning]
 grid_search = true
-objective = accuracy
+objectives = ['accuracy']
 
 [Output]
 # again, these can be absolute paths

diff --git a/examples/titanic/predict_train+dev.cfg b/examples/titanic/predict_train+dev.cfg
@@ -14,7 +14,7 @@ id_col = PassengerId
 
 [Tuning]
 grid_search = false
-objective = accuracy
+objectives = ['accuracy']
 
 [Output]
 # again, these can be absolute paths

diff --git a/examples/titanic/predict_train+dev_tuned.cfg b/examples/titanic/predict_train+dev_tuned.cfg
@@ -14,7 +14,7 @@ id_col = PassengerId
 
 [Tuning]
 grid_search = true
-objective = accuracy
+objectives = ['accuracy']
 
 [Output]
 # again, these can be absolute paths

diff --git a/examples/titanic/predict_train_only.cfg b/examples/titanic/predict_train_only.cfg
@@ -14,7 +14,7 @@ id_col = PassengerId
 
 [Tuning]
 grid_search = false
-objective = accuracy
+objectives = ['accuracy']
 
 [Output]
 # again, these can be absolute paths

diff --git a/examples/titanic/predict_train_only_tuned.cfg b/examples/titanic/predict_train_only_tuned.cfg
@@ -14,7 +14,7 @@ id_col = PassengerId
 
 [Tuning]
 grid_search = true
-objective = accuracy
+objectives = ['accuracy']
 
 [Output]
 # again, these can be absolute paths

diff --git a/examples/titanic/train.cfg b/examples/titanic/train.cfg
@@ -13,7 +13,7 @@ id_col = PassengerId
 
 [Tuning]
 grid_search = true
-objective = accuracy
+objectives = ['accuracy']
 
 [Output]
 # again, these can be absolute paths

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-scikit-learn==0.17.0
+scikit-learn==0.17.1
 six
 PrettyTable
 beautifulsoup4