Skip to content

Commit

Permalink
Merge pull request #667 from EducationalTestingService/use-5-folds-fo…
Browse files Browse the repository at this point in the history
…r-grid-search

Use 5 folds for grid search
  • Loading branch information
desilinguist committed Feb 23, 2021
2 parents dc5ed05 + ea4966d commit 86781f1
Show file tree
Hide file tree
Showing 43 changed files with 101 additions and 56 deletions.
2 changes: 1 addition & 1 deletion doc/run_experiment.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,7 @@ grid_search_folds *(Optional)*
""""""""""""""""""""""""""""""
The number of folds to use for grid search. Defaults to 3.
The number of folds to use for grid search. Defaults to 5.
.. _grid_search_jobs:
Expand Down
1 change: 1 addition & 0 deletions examples/boston/cross_val.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ suffix = .jsonlines

[Tuning]
grid_search = true
grid_search_folds = 3
objectives = ['pearson']

[Output]
Expand Down
1 change: 1 addition & 0 deletions examples/boston/evaluate.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ suffix = .jsonlines

[Tuning]
grid_search = true
grid_search_folds = 3
objectives = ['unweighted_kappa']

[Output]
Expand Down
1 change: 1 addition & 0 deletions examples/boston/voting.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ suffix = .jsonlines

[Tuning]
grid_search = true
grid_search_folds = 3
objectives = ['pearson']

[Output]
Expand Down
1 change: 1 addition & 0 deletions examples/iris/cross_val.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ suffix = .jsonlines

[Tuning]
grid_search = true
grid_search_folds = 3
objectives = ['f1_score_micro']

[Output]
Expand Down
1 change: 1 addition & 0 deletions examples/iris/evaluate.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ suffix = .jsonlines

[Tuning]
grid_search = true
grid_search_folds = 3
objectives = ['f1_score_micro']

[Output]
Expand Down
1 change: 1 addition & 0 deletions examples/iris/voting.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ suffix = .jsonlines

[Tuning]
grid_search = true
grid_search_folds = 3
objectives = ["accuracy"]

[Output]
Expand Down
1 change: 1 addition & 0 deletions examples/titanic/cross_validate.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ id_col = PassengerId

[Tuning]
grid_search = true
grid_search_folds = 3
objectives = ['accuracy']

[Output]
Expand Down
1 change: 1 addition & 0 deletions examples/titanic/evaluate_tuned.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ id_col = PassengerId

[Tuning]
grid_search = true
grid_search_folds = 3
objectives = ['accuracy']

[Output]
Expand Down
1 change: 1 addition & 0 deletions examples/titanic/predict_train+dev_tuned.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ id_col = PassengerId

[Tuning]
grid_search = true
grid_search_folds = 3
objectives = ['accuracy']

[Output]
Expand Down
1 change: 1 addition & 0 deletions examples/titanic/predict_train_only_tuned.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ id_col = PassengerId

[Tuning]
grid_search = true
grid_search_folds = 3
objectives = ['accuracy']

[Output]
Expand Down
1 change: 1 addition & 0 deletions examples/titanic/train.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ id_col = PassengerId

[Tuning]
grid_search = true
grid_search_folds = 3
objectives = ['accuracy']

[Output]
Expand Down
2 changes: 1 addition & 1 deletion skll/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def __init__(self):
'featureset_names': '[]',
'fixed_parameters': '[]',
'grid_search': 'True',
'grid_search_folds': '3',
'grid_search_folds': '5',
'grid_search_jobs': '0',
'hasher_features': '0',
'id_col': 'id',
Expand Down
4 changes: 2 additions & 2 deletions skll/learner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,7 @@ def _train_setup(self, examples):
def train(self, # noqa: C901
examples,
param_grid=None,
grid_search_folds=3,
grid_search_folds=5,
grid_search=True,
grid_objective=None,
grid_jobs=None,
Expand Down Expand Up @@ -1404,7 +1404,7 @@ def cross_validate(self,
stratified=True,
cv_folds=10,
grid_search=True,
grid_search_folds=3,
grid_search_folds=5,
grid_jobs=None,
grid_objective=None,
output_metrics=[],
Expand Down
4 changes: 2 additions & 2 deletions skll/learner/voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def from_file(cls, learner_path, logger=None):
def train(self,
examples,
param_grid_list=None,
grid_search_folds=3,
grid_search_folds=5,
grid_search=True,
grid_objective=None,
grid_jobs=None,
Expand Down Expand Up @@ -618,7 +618,7 @@ def cross_validate(self,
stratified=True,
cv_folds=10,
grid_search=True,
grid_search_folds=3,
grid_search_folds=5,
grid_jobs=None,
grid_objective=None,
output_metrics=[],
Expand Down
2 changes: 1 addition & 1 deletion skll/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@

KNOWN_REQUIRES_DENSE = (BayesianRidge, Lars, TheilSenRegressor)

MAX_CONCURRENT_PROCESSES = int(os.getenv('SKLL_MAX_CONCURRENT_PROCESSES', '3'))
MAX_CONCURRENT_PROCESSES = int(os.getenv('SKLL_MAX_CONCURRENT_PROCESSES', '5'))

VALID_FEATURE_SCALING_OPTIONS = frozenset(['both',
'none',
Expand Down
1 change: 1 addition & 0 deletions tests/configs/test_custom_metrics.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ learners=["LogisticRegression"]

[Tuning]
grid_search=true
grid_search_folds=3
objectives=["f075_macro"]

[Output]
Expand Down
1 change: 1 addition & 0 deletions tests/configs/test_custom_metrics_bad.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ learners=["LogisticRegression"]

[Tuning]
grid_search=true
grid_search_folds=3
objectives=["f075_macro"]

[Output]
Expand Down
1 change: 1 addition & 0 deletions tests/configs/test_custom_metrics_kappa.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ learners=["LogisticRegression"]

[Tuning]
grid_search=true
grid_search_folds=3
objectives=["unweighted_kappa"]

[Output]
Expand Down
1 change: 1 addition & 0 deletions tests/configs/test_custom_metrics_kwargs1.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ learners=["LogisticRegression"]

[Tuning]
grid_search=true
grid_search_folds=3
objectives=["one_minus_f1_macro"]

[Output]
1 change: 1 addition & 0 deletions tests/configs/test_custom_metrics_kwargs2.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ learners=["LogisticRegression"]

[Tuning]
grid_search=true
grid_search_folds=3
objectives=["f1_score_macro"]

[Output]
1 change: 1 addition & 0 deletions tests/configs/test_custom_metrics_kwargs3.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ learners=["LinearSVC"]

[Tuning]
grid_search=true
grid_search_folds=3
objectives=["fake_prob_metric_multiclass"]

[Output]
1 change: 1 addition & 0 deletions tests/configs/test_folds_file_grid.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ folds_file = ../train/folds_file_test.csv
[Tuning]
grid_search=True
objectives = ['f1_score_micro']
grid_search_folds=3
use_folds_file_for_grid_search=False

[Output]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ custom_learner_path=other/majority_class_learner.py

[Tuning]
grid_search=true
grid_search_folds=3
objectives=['accuracy']

[Output]
Expand Down
1 change: 1 addition & 0 deletions tests/configs/test_regression_fancy_output.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ suffix=.jsonlines

[Tuning]
grid_search=True
grid_search_folds=3
objectives=['pearson']

[Output]
1 change: 1 addition & 0 deletions tests/configs/test_relative_paths.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ custom_learner_path=../other/majority_class_learner.py

[Tuning]
grid_search=true
grid_search_folds=3
objectives=['accuracy']

[Output]
Expand Down
1 change: 1 addition & 0 deletions tests/configs/test_single_file.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ learners=["RandomForestClassifier"]

[Tuning]
grid_search=true
grid_search_folds=3
objectives=["accuracy", "f1", "f05"]

[Output]
Expand Down
1 change: 1 addition & 0 deletions tests/configs/test_summary.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ suffix=.jsonlines

[Tuning]
grid_search=true
grid_search_folds=3
objectives=['accuracy']

[Output]
Expand Down
1 change: 1 addition & 0 deletions tests/configs/test_summary_feature_hasher.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ suffix=.jsonlines

[Tuning]
grid_search=true
grid_search_folds=3
objectives=['accuracy']

[Output]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ suffix=.jsonlines

[Tuning]
grid_search=true
grid_search_folds=3
objectives=['accuracy']

[Output]
Expand Down
1 change: 1 addition & 0 deletions tests/configs/test_summary_with_metrics.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ suffix=.jsonlines

[Tuning]
grid_search=true
grid_search_folds=3
objectives=['accuracy']

[Output]
Expand Down
23 changes: 17 additions & 6 deletions tests/test_custom_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,9 @@ def test_custom_metric_api_experiment():
# set up a learner to tune using one of the custom metrics
# and evaluate it using the other one
learner = Learner("LogisticRegression")
_ = learner.train(train_fs, grid_objective="f075_macro")
_ = learner.train(train_fs,
grid_objective="f075_macro",
grid_search_folds=3)
results = learner.evaluate(test_fs,
grid_objective="f075_macro",
output_metrics=["balanced_accuracy", "f06_micro"])
Expand Down Expand Up @@ -344,7 +346,9 @@ def test_custom_metric_api_experiment_with_kappa_filename():
# this should work as there should be no confict between
# the two "kappa" names
learner = Learner("LogisticRegression")
_ = learner.train(train_fs, grid_objective="unweighted_kappa")
_ = learner.train(train_fs,
grid_objective="unweighted_kappa",
grid_search_folds=3)
results = learner.evaluate(test_fs,
grid_objective="unweighted_kappa",
output_metrics=["balanced_accuracy",
Expand Down Expand Up @@ -431,14 +435,16 @@ def test_api_with_inverted_custom_metric():
learner1 = Learner("LogisticRegression")
(grid_score1,
grid_results_dict1) = learner1.train(train_fs,
grid_objective="one_minus_precision")
grid_objective="one_minus_precision",
grid_search_folds=3)

# now setup another learner that uses the complementary version
# of our custom metric (regular precision) for grid search
learner2 = Learner("LogisticRegression")
(grid_score2,
grid_results_dict2) = learner2.train(train_fs,
grid_objective="precision")
grid_objective="precision",
grid_search_folds=3)

# for both learners the ranking of the C hyperparameter should be
# should be the identical since when we defined one_minus_precision
Expand Down Expand Up @@ -530,12 +536,17 @@ def test_api_with_custom_prob_metric():
learner1 = Learner("LinearSVC")
assert_raises_regex(AttributeError,
r"has no attribute 'predict_proba'",
learner1.train, train_fs, grid_objective="fake_prob_metric")
learner1.train,
train_fs,
grid_objective="fake_prob_metric",
grid_search_folds=3)

# set up another learner with explicit probability support
# this should work just fine with our custom metric
learner2 = Learner("SVC", probability=True)
grid_score, _ = learner2.train(train_fs, grid_objective="fake_prob_metric")
grid_score, _ = learner2.train(train_fs,
grid_objective="fake_prob_metric",
grid_search_folds=3)
ok_(grid_score > 0.95)


Expand Down

0 comments on commit 86781f1

Please sign in to comment.