Merge pull request #538 from EducationalTestingService/536-folds-file…

…-for-evaluate-predict Allow `folds_file` to be used for `evaluate` and `predict` tasks.
EducationalTestingService · Oct 2, 2019 · 0e629aa · 0e629aa
2 parents cf73da5 + fd48252
commit 0e629aa
Show file tree

Hide file tree

Showing 3 changed files with 83 additions and 30 deletions.
diff --git a/doc/run_experiment.rst b/doc/run_experiment.rst
@@ -447,31 +447,31 @@ folds_file *(Optional)*
 """"""""""""""""""""""""""""""
 
 Path to a csv file specifying the mapping of instances in the training data
-to folds. This can be specified when the :ref:`task` is either ``train`` or
-``cross_validate``. For the ``train`` task, if :ref:`grid_search <grid_search>`
-is ``True``, this file, if specified, will be used to define the
-cross-validation used for the grid search (leave one fold ID out at a time).
-Otherwise, it will be ignored.
+to folds. This can be specified for the ``train``, ``evaluate``, ``predict``,
+and ``cross_validate`` tasks. For the  ``train``/``evaluate``/``predict`` tasks, 
+if :ref:`grid_search <grid_search>` is ``True``, this file, if specified, will be 
+used to define the cross-validation used for the grid search (leave one fold ID out 
+at a time). Otherwise, it will be ignored.
 
 For the ``cross_validate`` task, this file will be used to define the outer
-cross-validation loop and, if :ref:`grid_search <grid_search>` is ``True``, also for the
-inner grid-search cross-validation loop. If the goal of specifiying the folds
-file is to ensure that the model does not learn to differentiate based on a confound:
-e.g. the data from the same person is always in the same fold, it makes sense to
-keep the same folds for both the outer and the inner cross-validation loops.
-
-However, sometimes the goal of specifying the folds file is simply for the
-purpose of comparison to another existing experiment or another context
-in which maintaining the constitution of the folds in the inner
-grid-search loop is not required. In this case, users may set the parameter
+cross-validation loop and also for the inner grid-search cross-validation loop. 
+If the goal of specifiying the folds file is to ensure that the model does not 
+learn to differentiate based on a confound: e.g. the data from the same person 
+is always in the same fold, it makes sense to keep the same folds for both the 
+outer and the inner cross-validation loops.
+
+However, sometimes the goal of specifying the folds file is simply to
+compare to another existing experiment or in another context where 
+maintaining the constitution of the folds in the inner
+grid-search loop is not required. In this case, users may set the option
 :ref:`use_folds_file_for_grid_search <use_folds_file_for_grid_search>`
-to ``False`` which will then direct the inner grid-search cross-validation loop
-to simply use the number specified via :ref:`grid_search_folds <grid_search_folds>`
-instead of using the folds file. This will likely lead to shorter execution times as
-well depending on how many folds are in the folds file and the value
-of :ref:`grid_search_folds <grid_search_folds>`.
+in the configuration file to ``False`` which will then direct the inner 
+grid-search cross-validation loop to simply use the number specified via 
+:ref:`grid_search_folds <grid_search_folds>` instead of using the folds file. 
+This can also likely lead to shorter execution times depending on how many
+folds are in the folds file and the value of :ref:`grid_search_folds <grid_search_folds>`.
 
-The format of this file must be as follows: the first row must be a header.
+The format of this file should be as follows: the first row must be a header.
 This header row is ignored, so it doesn't matter what the header row contains,
 but it must be there. If there is no header row, whatever row is in its place
 will be ignored. The first column should consist of training set IDs and the

diff --git a/skll/config.py b/skll/config.py
@@ -847,15 +847,15 @@ def _parse_config_file(config_path, log_level=logging.INFO):
                          "to use `neg_log_loss` as the objective.")
 
     # set the folds appropriately based on the task:
-    #  (a) if the task is `train` and if an external fold mapping is specified
-    #      then use that mapping for grid search instead of the value
-    #      contained in `grid_search_folds`.
+    #  (a) if the task is `train`/`evaluate`/`predict` and if an external
+    #      fold mapping is specified then use that mapping for grid search
+    #      instead of the value contained in `grid_search_folds`.
     #  (b) if the task is `cross_validate` and an external fold mapping is specified
-    #      then use that mapping for the outer CV loop. Depending on the value of
-    #      `use_folds_file_for_grid_search`, use the fold mapping for the inner
-    #       grid-search loop as well.
+    #      then use that mapping for the outer CV loop and for the inner grid-search
+    #      loop. However, if  `use_folds_file_for_grid_search` is `False`, do not
+    #      use the fold mapping for the inner loop.
     cv_folds = None
-    if task == 'train' and specified_folds_mapping:
+    if task in ['train', 'evaluate', 'predict'] and specified_folds_mapping:
         grid_search_folds = specified_folds_mapping
         # only print out the warning if the user actually wants to do grid search
         if do_grid_search:

diff --git a/tests/test_input.py b/tests/test_input.py
@@ -1242,7 +1242,7 @@ def test_config_parsing_metrics_and_objectives_overlap():
 def test_cv_folds_and_grid_search_folds():
 
     # we want to test all possible combinations of the following variables:
-    #  task = train, cross_validate
+    #  task = train, evaluate, predict, cross_validate
     #  cv_folds/folds_file = not specified, number, csv file
     #  grid_search_folds = not specified, number
     #  use_folds_file_for_grid_search = not specified, True, False
@@ -1269,6 +1269,42 @@ def test_cv_folds_and_grid_search_folds():
     # ('train', 'train/folds_file_test.csv', 7, None) ->  (None, fold_mapping)
     # ('train', 'train/folds_file_test.csv', 7, True) ->  (None, fold_mapping)
     # ('train', 'train/folds_file_test.csv', 7, False) ->  (None, fold_mapping)
+    # ('evaluate', None, None, None) ->  (None, 3)
+    # ('evaluate', None, None, True) ->  (None, 3)
+    # ('evaluate', None, None, False) ->  (None, 3)
+    # ('evaluate', None, 7, None) ->  (None, 7)
+    # ('evaluate', None, 7, True) ->  (None, 7)
+    # ('evaluate', None, 7, False) ->  (None, 7)
+    # ('evaluate', 5, None, None) ->  (None, 3)
+    # ('evaluate', 5, None, True) ->  (None, 3)
+    # ('evaluate', 5, None, False) ->   (None, 3)
+    # ('evaluate', 5, 7, None) ->  (None, 7)
+    # ('evaluate', 5, 7, True) ->  (None, 7)
+    # ('evaluate', 5, 7, False) ->  (None, 7)
+    # ('evaluate', 'train/folds_file_test.csv', None, None) ->  (None, fold_mapping)
+    # ('evaluate', 'train/folds_file_test.csv', None, True) ->  (None, fold_mapping)
+    # ('evaluate', 'train/folds_file_test.csv', None, False) ->  (None, fold_mapping)
+    # ('evaluate', 'train/folds_file_test.csv', 7, None) ->  (None, fold_mapping)
+    # ('evaluate', 'train/folds_file_test.csv', 7, True) ->  (None, fold_mapping)
+    # ('evaluate', 'train/folds_file_test.csv', 7, False) ->  (None, fold_mapping)
+    # ('predict', None, None, None) ->  (None, 3)
+    # ('predict', None, None, True) ->  (None, 3)
+    # ('predict', None, None, False) ->  (None, 3)
+    # ('predict', None, 7, None) ->  (None, 7)
+    # ('predict', None, 7, True) ->  (None, 7)
+    # ('predict', None, 7, False) ->  (None, 7)
+    # ('predict', 5, None, None) ->  (None, 3)
+    # ('predict', 5, None, True) ->  (None, 3)
+    # ('predict', 5, None, False) ->   (None, 3)
+    # ('predict', 5, 7, None) ->  (None, 7)
+    # ('predict', 5, 7, True) ->  (None, 7)
+    # ('predict', 5, 7, False) ->  (None, 7)
+    # ('predict', 'train/folds_file_test.csv', None, None) ->  (None, fold_mapping)
+    # ('predict', 'train/folds_file_test.csv', None, True) ->  (None, fold_mapping)
+    # ('predict', 'train/folds_file_test.csv', None, False) ->  (None, fold_mapping)
+    # ('predict', 'train/folds_file_test.csv', 7, None) ->  (None, fold_mapping)
+    # ('predict', 'train/folds_file_test.csv', 7, True) ->  (None, fold_mapping)
+    # ('predict', 'train/folds_file_test.csv', 7, False) ->  (None, fold_mapping)
     # ('cross_validate', None, None, None) ->  (10, 3)
     # ('cross_validate', None, None, True) ->  (10, 3)
     # ('cross_validate', None, None, False) ->  (10, 3)
@@ -1296,11 +1332,25 @@ def test_cv_folds_and_grid_search_folds():
           grid_search_folds,
           use_folds_file_for_grid_search),
          (chosen_cv_folds,
-          chosen_grid_search_folds)) in zip(product(['train', 'cross_validate'],
+          chosen_grid_search_folds)) in zip(product(['train', 'evaluate', 'predict', 'cross_validate'],
                                                     [None, 5, join(_my_dir, 'train/folds_file_test.csv')],
                                                     [None, 7],
                                                     [None, True, False]),
                                             [(None, 3), (None, 3), (None, 3),
+                                             (None, 7), (None, 7), (None, 7),
+                                             (None, 3), (None, 3), (None, 3),
+                                             (None, 7), (None, 7), (None, 7),
+                                             (None, 'fold_mapping'), (None, 'fold_mapping'),
+                                             (None, 'fold_mapping'), (None, 'fold_mapping'),
+                                             (None, 'fold_mapping'), (None, 'fold_mapping'),
+                                             (None, 3), (None, 3), (None, 3),
+                                             (None, 7), (None, 7), (None, 7),
+                                             (None, 3), (None, 3), (None, 3),
+                                             (None, 7), (None, 7), (None, 7),
+                                             (None, 'fold_mapping'), (None, 'fold_mapping'),
+                                             (None, 'fold_mapping'), (None, 'fold_mapping'),
+                                             (None, 'fold_mapping'), (None, 'fold_mapping'),
+                                             (None, 3), (None, 3), (None, 3),
                                              (None, 7), (None, 7), (None, 7),
                                              (None, 3), (None, 3), (None, 3),
                                              (None, 7), (None, 7), (None, 7),
@@ -1330,6 +1380,7 @@ def check_cv_folds_and_grid_search_folds(task,
                                          chosen_grid_search_folds):
 
     train_dir = join(_my_dir, 'train')
+    test_dir = join(_my_dir, 'test')
     output_dir = join(_my_dir, 'output')
 
     # read in the folds file into a dictionary and replace the string
@@ -1354,6 +1405,8 @@ def check_cv_folds_and_grid_search_folds(task,
     # when cross-validating
     if task == 'train':
         values_to_fill_dict['models'] = output_dir
+    elif task in ['evaluate', 'predict']:
+        values_to_fill_dict['test_directory'] = test_dir
     elif task == 'cross_validate':
         values_to_fill_dict['results'] = output_dir