Merge pull request #667 from EducationalTestingService/use-5-folds-fo…

…r-grid-search Use 5 folds for grid search
EducationalTestingService · Feb 23, 2021 · 86781f1 · 86781f1
2 parents dc5ed05 + ea4966d
commit 86781f1
Show file tree

Hide file tree

Showing 43 changed files with 101 additions and 56 deletions.
diff --git a/doc/run_experiment.rst b/doc/run_experiment.rst
@@ -1012,7 +1012,7 @@ grid_search_folds *(Optional)*
 
 """"""""""""""""""""""""""""""
 
-The number of folds to use for grid search. Defaults to 3.
+The number of folds to use for grid search. Defaults to 5.
 
 .. _grid_search_jobs:
 

diff --git a/examples/boston/cross_val.cfg b/examples/boston/cross_val.cfg
@@ -17,6 +17,7 @@ suffix = .jsonlines
 
 [Tuning]
 grid_search = true
+grid_search_folds = 3
 objectives = ['pearson']
 
 [Output]

diff --git a/examples/boston/evaluate.cfg b/examples/boston/evaluate.cfg
@@ -17,6 +17,7 @@ suffix = .jsonlines
 
 [Tuning]
 grid_search = true
+grid_search_folds = 3
 objectives = ['unweighted_kappa']
 
 [Output]

diff --git a/examples/boston/voting.cfg b/examples/boston/voting.cfg
@@ -19,6 +19,7 @@ suffix = .jsonlines
 
 [Tuning]
 grid_search = true
+grid_search_folds = 3
 objectives = ['pearson']
 
 [Output]

diff --git a/examples/iris/cross_val.cfg b/examples/iris/cross_val.cfg
@@ -14,6 +14,7 @@ suffix = .jsonlines
 
 [Tuning]
 grid_search = true
+grid_search_folds = 3
 objectives = ['f1_score_micro']
 
 [Output]

diff --git a/examples/iris/evaluate.cfg b/examples/iris/evaluate.cfg
@@ -15,6 +15,7 @@ suffix = .jsonlines
 
 [Tuning]
 grid_search = true
+grid_search_folds = 3
 objectives = ['f1_score_micro']
 
 [Output]

diff --git a/examples/iris/voting.cfg b/examples/iris/voting.cfg
@@ -17,6 +17,7 @@ suffix = .jsonlines
 
 [Tuning]
 grid_search = true
+grid_search_folds = 3
 objectives = ["accuracy"]
 
 [Output]

diff --git a/examples/titanic/cross_validate.cfg b/examples/titanic/cross_validate.cfg
@@ -12,6 +12,7 @@ id_col = PassengerId
 
 [Tuning]
 grid_search = true
+grid_search_folds = 3
 objectives = ['accuracy']
 
 [Output]

diff --git a/examples/titanic/evaluate_tuned.cfg b/examples/titanic/evaluate_tuned.cfg
@@ -14,6 +14,7 @@ id_col = PassengerId
 
 [Tuning]
 grid_search = true
+grid_search_folds = 3
 objectives = ['accuracy']
 
 [Output]

diff --git a/examples/titanic/predict_train+dev_tuned.cfg b/examples/titanic/predict_train+dev_tuned.cfg
@@ -14,6 +14,7 @@ id_col = PassengerId
 
 [Tuning]
 grid_search = true
+grid_search_folds = 3
 objectives = ['accuracy']
 
 [Output]

diff --git a/examples/titanic/predict_train_only_tuned.cfg b/examples/titanic/predict_train_only_tuned.cfg
@@ -14,6 +14,7 @@ id_col = PassengerId
 
 [Tuning]
 grid_search = true
+grid_search_folds = 3
 objectives = ['accuracy']
 
 [Output]

diff --git a/examples/titanic/train.cfg b/examples/titanic/train.cfg
@@ -13,6 +13,7 @@ id_col = PassengerId
 
 [Tuning]
 grid_search = true
+grid_search_folds = 3
 objectives = ['accuracy']
 
 [Output]

diff --git a/skll/config/__init__.py b/skll/config/__init__.py
@@ -62,7 +62,7 @@ def __init__(self):
                     'featureset_names': '[]',
                     'fixed_parameters': '[]',
                     'grid_search': 'True',
-                    'grid_search_folds': '3',
+                    'grid_search_folds': '5',
                     'grid_search_jobs': '0',
                     'hasher_features': '0',
                     'id_col': 'id',

diff --git a/skll/learner/__init__.py b/skll/learner/__init__.py
@@ -748,7 +748,7 @@ def _train_setup(self, examples):
     def train(self,  # noqa: C901
               examples,
               param_grid=None,
-              grid_search_folds=3,
+              grid_search_folds=5,
               grid_search=True,
               grid_objective=None,
               grid_jobs=None,
@@ -1404,7 +1404,7 @@ def cross_validate(self,
                        stratified=True,
                        cv_folds=10,
                        grid_search=True,
-                       grid_search_folds=3,
+                       grid_search_folds=5,
                        grid_jobs=None,
                        grid_objective=None,
                        output_metrics=[],

diff --git a/skll/learner/voting.py b/skll/learner/voting.py
@@ -276,7 +276,7 @@ def from_file(cls, learner_path, logger=None):
     def train(self,
               examples,
               param_grid_list=None,
-              grid_search_folds=3,
+              grid_search_folds=5,
               grid_search=True,
               grid_objective=None,
               grid_jobs=None,
@@ -618,7 +618,7 @@ def cross_validate(self,
                        stratified=True,
                        cv_folds=10,
                        grid_search=True,
-                       grid_search_folds=3,
+                       grid_search_folds=5,
                        grid_jobs=None,
                        grid_objective=None,
                        output_metrics=[],

diff --git a/skll/utils/constants.py b/skll/utils/constants.py
@@ -122,7 +122,7 @@
 
 KNOWN_REQUIRES_DENSE = (BayesianRidge, Lars, TheilSenRegressor)
 
-MAX_CONCURRENT_PROCESSES = int(os.getenv('SKLL_MAX_CONCURRENT_PROCESSES', '3'))
+MAX_CONCURRENT_PROCESSES = int(os.getenv('SKLL_MAX_CONCURRENT_PROCESSES', '5'))
 
 VALID_FEATURE_SCALING_OPTIONS = frozenset(['both',
                                            'none',

diff --git a/tests/configs/test_custom_metrics.template.cfg b/tests/configs/test_custom_metrics.template.cfg
@@ -8,6 +8,7 @@ learners=["LogisticRegression"]
 
 [Tuning]
 grid_search=true
+grid_search_folds=3
 objectives=["f075_macro"]
 
 [Output]

diff --git a/tests/configs/test_custom_metrics_bad.template.cfg b/tests/configs/test_custom_metrics_bad.template.cfg
@@ -8,6 +8,7 @@ learners=["LogisticRegression"]
 
 [Tuning]
 grid_search=true
+grid_search_folds=3
 objectives=["f075_macro"]
 
 [Output]

diff --git a/tests/configs/test_custom_metrics_kappa.template.cfg b/tests/configs/test_custom_metrics_kappa.template.cfg
@@ -8,6 +8,7 @@ learners=["LogisticRegression"]
 
 [Tuning]
 grid_search=true
+grid_search_folds=3
 objectives=["unweighted_kappa"]
 
 [Output]

diff --git a/tests/configs/test_custom_metrics_kwargs1.template.cfg b/tests/configs/test_custom_metrics_kwargs1.template.cfg
@@ -8,6 +8,7 @@ learners=["LogisticRegression"]
 
 [Tuning]
 grid_search=true
+grid_search_folds=3
 objectives=["one_minus_f1_macro"]
 
 [Output]
diff --git a/tests/configs/test_custom_metrics_kwargs2.template.cfg b/tests/configs/test_custom_metrics_kwargs2.template.cfg
@@ -8,6 +8,7 @@ learners=["LogisticRegression"]
 
 [Tuning]
 grid_search=true
+grid_search_folds=3
 objectives=["f1_score_macro"]
 
 [Output]
diff --git a/tests/configs/test_custom_metrics_kwargs3.template.cfg b/tests/configs/test_custom_metrics_kwargs3.template.cfg
@@ -8,6 +8,7 @@ learners=["LinearSVC"]
 
 [Tuning]
 grid_search=true
+grid_search_folds=3
 objectives=["fake_prob_metric_multiclass"]
 
 [Output]
diff --git a/tests/configs/test_folds_file_grid.template.cfg b/tests/configs/test_folds_file_grid.template.cfg
@@ -11,6 +11,7 @@ folds_file = ../train/folds_file_test.csv
 [Tuning]
 grid_search=True
 objectives = ['f1_score_micro']
+grid_search_folds=3
 use_folds_file_for_grid_search=False
 
 [Output]

diff --git a/tests/configs/test_majority_class_custom_learner.template.cfg b/tests/configs/test_majority_class_custom_learner.template.cfg
@@ -10,6 +10,7 @@ custom_learner_path=other/majority_class_learner.py
 
 [Tuning]
 grid_search=true
+grid_search_folds=3
 objectives=['accuracy']
 
 [Output]

diff --git a/tests/configs/test_regression_fancy_output.template.cfg b/tests/configs/test_regression_fancy_output.template.cfg
@@ -9,6 +9,7 @@ suffix=.jsonlines
 
 [Tuning]
 grid_search=True
+grid_search_folds=3
 objectives=['pearson']
 
 [Output]
diff --git a/tests/configs/test_relative_paths.template.cfg b/tests/configs/test_relative_paths.template.cfg
@@ -9,6 +9,7 @@ custom_learner_path=../other/majority_class_learner.py
 
 [Tuning]
 grid_search=true
+grid_search_folds=3
 objectives=['accuracy']
 
 [Output]

diff --git a/tests/configs/test_single_file.template.cfg b/tests/configs/test_single_file.template.cfg
@@ -7,6 +7,7 @@ learners=["RandomForestClassifier"]
 
 [Tuning]
 grid_search=true
+grid_search_folds=3
 objectives=["accuracy", "f1", "f05"]
 
 [Output]

diff --git a/tests/configs/test_summary.template.cfg b/tests/configs/test_summary.template.cfg
@@ -9,6 +9,7 @@ suffix=.jsonlines
 
 [Tuning]
 grid_search=true
+grid_search_folds=3
 objectives=['accuracy']
 
 [Output]

diff --git a/tests/configs/test_summary_feature_hasher.template.cfg b/tests/configs/test_summary_feature_hasher.template.cfg
@@ -11,6 +11,7 @@ suffix=.jsonlines
 
 [Tuning]
 grid_search=true
+grid_search_folds=3
 objectives=['accuracy']
 
 [Output]

diff --git a/tests/configs/test_summary_feature_hasher_with_metrics.template.cfg b/tests/configs/test_summary_feature_hasher_with_metrics.template.cfg
@@ -11,6 +11,7 @@ suffix=.jsonlines
 
 [Tuning]
 grid_search=true
+grid_search_folds=3
 objectives=['accuracy']
 
 [Output]

diff --git a/tests/configs/test_summary_with_metrics.template.cfg b/tests/configs/test_summary_with_metrics.template.cfg
@@ -9,6 +9,7 @@ suffix=.jsonlines
 
 [Tuning]
 grid_search=true
+grid_search_folds=3
 objectives=['accuracy']
 
 [Output]

diff --git a/tests/test_custom_metrics.py b/tests/test_custom_metrics.py
@@ -275,7 +275,9 @@ def test_custom_metric_api_experiment():
     # set up a learner to tune using one of the custom metrics
     # and evaluate it using the other one
     learner = Learner("LogisticRegression")
-    _ = learner.train(train_fs, grid_objective="f075_macro")
+    _ = learner.train(train_fs,
+                      grid_objective="f075_macro",
+                      grid_search_folds=3)
     results = learner.evaluate(test_fs,
                                grid_objective="f075_macro",
                                output_metrics=["balanced_accuracy", "f06_micro"])
@@ -344,7 +346,9 @@ def test_custom_metric_api_experiment_with_kappa_filename():
     # this should work as there should be no confict between
     # the two "kappa" names
     learner = Learner("LogisticRegression")
-    _ = learner.train(train_fs, grid_objective="unweighted_kappa")
+    _ = learner.train(train_fs,
+                      grid_objective="unweighted_kappa",
+                      grid_search_folds=3)
     results = learner.evaluate(test_fs,
                                grid_objective="unweighted_kappa",
                                output_metrics=["balanced_accuracy",
@@ -431,14 +435,16 @@ def test_api_with_inverted_custom_metric():
     learner1 = Learner("LogisticRegression")
     (grid_score1,
      grid_results_dict1) = learner1.train(train_fs,
-                                          grid_objective="one_minus_precision")
+                                          grid_objective="one_minus_precision",
+                                          grid_search_folds=3)
 
     # now setup another learner that uses the complementary version
     # of our custom metric (regular precision) for grid search
     learner2 = Learner("LogisticRegression")
     (grid_score2,
      grid_results_dict2) = learner2.train(train_fs,
-                                          grid_objective="precision")
+                                          grid_objective="precision",
+                                          grid_search_folds=3)
 
     # for both learners the ranking of the C hyperparameter should be
     # should be the identical since when we defined one_minus_precision
@@ -530,12 +536,17 @@ def test_api_with_custom_prob_metric():
     learner1 = Learner("LinearSVC")
     assert_raises_regex(AttributeError,
                         r"has no attribute 'predict_proba'",
-                        learner1.train, train_fs, grid_objective="fake_prob_metric")
+                        learner1.train,
+                        train_fs,
+                        grid_objective="fake_prob_metric",
+                        grid_search_folds=3)
 
     # set up another learner with explicit probability support
     # this should work just fine with our custom metric
     learner2 = Learner("SVC", probability=True)
-    grid_score, _ = learner2.train(train_fs, grid_objective="fake_prob_metric")
+    grid_score, _ = learner2.train(train_fs,
+                                   grid_objective="fake_prob_metric",
+                                   grid_search_folds=3)
     ok_(grid_score > 0.95)