Merge pull request #462 from EducationalTestingService/increase-coverage

Write new tests to increase test coverage
EducationalTestingService · Feb 14, 2019 · b2b5b22 · b2b5b22
2 parents 1067873 + 021d250
commit b2b5b22
Show file tree

Hide file tree

Showing 7 changed files with 126 additions and 14 deletions.
diff --git a/skll/learner.py b/skll/learner.py
@@ -1385,8 +1385,8 @@ def train(self, examples, param_grid=None, grid_search_folds=3,
         # selected learner
         if grid_search:
             if not grid_objective:
-                raise ValueError("You must specify a grid objective "
-                                 "if doing grid search.")
+                raise ValueError("Grid search is on by default. You must either "
+                                 "specify a grid objective or turn off grid search.")
             if self.model_type._estimator_type == 'regressor':
                 # types 2-4 are valid for all regression models
                 if grid_objective in _CLASSIFICATION_ONLY_OBJ_FUNCS:

diff --git a/tests/other/custom_learner.txt b/tests/other/custom_learner.txt
@@ -0,0 +1,13 @@
+# License: BSD 3 clause
+"""
+A simple wrapper around the existing LogisticRegression class, for testing
+custom learners functionality.
+
+:author: Michael Heilman (mheilman@ets.org)
+"""
+
+from sklearn.linear_model import LogisticRegression
+
+
+class CustomLogisticRegressionWrapper(LogisticRegression):
+    pass
diff --git a/tests/other/test_load_saved_model.2.model b/tests/other/test_load_saved_model.2.model
diff --git a/tests/other/test_load_saved_model.3.model b/tests/other/test_load_saved_model.3.model
diff --git a/tests/test_classification.py b/tests/test_classification.py
@@ -17,6 +17,7 @@
 import json
 import os
 import re
+import sys
 import warnings
 
 from io import open
@@ -26,15 +27,14 @@
 from nose.tools import eq_, assert_almost_equal, raises
 
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.feature_extraction import FeatureHasher
 from sklearn.metrics import accuracy_score
 
 from skll.data import FeatureSet
 from skll.data.readers import NDJReader
 from skll.data.writers import NDJWriter
 from skll.config import _parse_config_file
 from skll.experiments import run_configuration
-from skll.learner import Learner
+from skll.learner import Learner, _train_and_score
 from skll.learner import _DEFAULT_PARAM_GRIDS
 
 from utils import (make_classification_data, make_regression_data,
@@ -671,3 +671,81 @@ def test_bad_xval_float_classes():
 
     yield check_bad_xval_float_classes, True
     yield check_bad_xval_float_classes, False
+
+
+def check_train_and_score_function(model_type):
+    """
+    Check that the _train_and_score() function works as expected
+    """
+
+    # create train and test data
+    (train_fs,
+     test_fs) = make_classification_data(num_examples=500,
+                                         train_test_ratio=0.7,
+                                         num_features=5,
+                                         use_feature_hashing=False,
+                                         non_negative=True)
+
+    # call _train_and_score() on this data
+    estimator_name = 'LogisticRegression' if model_type == 'classifier' else 'Ridge'
+    metric = 'accuracy' if model_type == 'classifier' else 'pearson'
+    learner1 = Learner(estimator_name)
+    train_score1, test_score1 = _train_and_score(learner1, train_fs, test_fs, metric)
+
+    # this should yield identical results when training another instance
+    # of the same learner without grid search and shuffling and evaluating
+    # that instance on the train and the test set
+    learner2 = Learner(estimator_name)
+    learner2.train(train_fs, grid_search=False, shuffle=False)
+    train_score2 = learner2.evaluate(train_fs, output_metrics=[metric])[-1][metric]
+    test_score2 = learner2.evaluate(test_fs, output_metrics=[metric])[-1][metric]
+
+    eq_(train_score1, train_score2)
+    eq_(test_score1, test_score2)
+
+
+def test_train_and_score_function():
+    yield check_train_and_score_function, 'classifier'
+    yield check_train_and_score_function, 'regressor'
+
+
+@raises(ValueError)
+def test_learner_api_grid_search_no_objective():
+
+    (train_fs,
+     test_fs) = make_classification_data(num_examples=500,
+                                         train_test_ratio=0.7,
+                                         num_features=5,
+                                         use_feature_hashing=False,
+                                         non_negative=True)
+    learner = Learner('LogisticRegression')
+    _ = learner.train(train_fs)
+
+
+def test_learner_api_load_into_existing_instance():
+    """
+    Check that `Learner.load()` works as expected
+    """
+
+    # create a LinearSVC instance and train it on some data
+    learner1 = Learner('LinearSVC')
+    (train_fs,
+     test_fs) = make_classification_data(num_examples=200,
+                                         num_features=5,
+                                         use_feature_hashing=False,
+                                         non_negative=True)
+    learner1.train(train_fs, grid_search=False)
+
+    # now use `load()` to replace the existing instance with a
+    # different saved learner
+    other_model_file = join(_my_dir, 'other', 'test_load_saved_model.{}.model'.format(sys.version_info[0]))
+    learner1.load(other_model_file)
+
+    # now load the saved model into another instance using the class method
+    # `from_file()`
+    learner2 = Learner.from_file(other_model_file)
+
+    # check that the two instances are now basically the same
+    eq_(learner1.model_type, learner2.model_type)
+    eq_(learner1.model_params, learner2.model_params)
+    eq_(learner1.model_kwargs, learner2.model_kwargs)
diff --git a/tests/test_custom_learner.py b/tests/test_custom_learner.py
@@ -19,10 +19,11 @@
 from os.path import abspath, dirname, exists, join
 
 import numpy as np
+from nose.tools import raises
 from numpy.testing import assert_array_equal
 from skll.data import NDJWriter
 from skll.experiments import run_configuration
-from skll.learner import _DEFAULT_PARAM_GRIDS
+from skll.learner import _DEFAULT_PARAM_GRIDS, Learner
 
 from utils import fill_in_config_paths, make_classification_data
 
@@ -204,8 +205,7 @@ def test_custom_learner_model_loading():
     outprefix = 'test_model_custom_learner'
     pred_file = join(_my_dir, 'output',
                      '{}_{}_CustomLogisticRegressionWrapper'
-                     '_predictions.tsv'.format(outprefix,
-                                           outprefix))
+                     '_predictions.tsv'.format(outprefix, outprefix))
     preds1 = read_predictions(pred_file)
     os.unlink(pred_file)
 
@@ -222,3 +222,14 @@ def test_custom_learner_model_loading():
 
     # make sure that they are the same as before
     assert_array_equal(preds1, preds2)
+
+
+@raises(ValueError)
+def test_custom_learner_api_missing_file():
+    _ = Learner('CustomType')
+
+
+@raises(ValueError)
+def test_custom_learner_api_bad_extension():
+    other_dir = join(_my_dir, 'other')
+    _ = Learner('_CustomLogisticRegressionWrapper', custom_learner_path=join(other_dir, 'custom_learner.txt'))
diff --git a/tests/test_regression.py b/tests/test_regression.py
@@ -21,18 +21,19 @@
 from itertools import product
 from os.path import abspath, dirname, join, exists
 
-from nose.tools import eq_, assert_almost_equal
+from nose.tools import eq_, assert_almost_equal, raises
 
 import numpy as np
 from numpy.testing import assert_allclose
 from scipy.stats import pearsonr
 from sklearn.exceptions import ConvergenceWarning
+from sklearn.linear_model import LogisticRegression
 from sklearn.utils.testing import assert_greater, assert_less
 
 from skll.data import FeatureSet, NDJWriter
 from skll.config import _setup_config_parser
 from skll.experiments import run_configuration
-from skll.learner import Learner
+from skll.learner import Learner, rescaled
 from skll.learner import _DEFAULT_PARAM_GRIDS
 
 from utils import make_regression_data, fill_in_config_paths_for_fancy_output
@@ -676,10 +677,19 @@ def test_dummy_regressor_predict():
                                               {"strategy": "quantile", "quantile": 0.0},
                                               {"strategy": "quantile", "quantile": 1.0},
                                               {"strategy": "constant", "constant": 1}],
-                                             [np.ones(10)*np.mean(train_labels),
-                                              np.ones(10)*np.median(train_labels),
-                                              np.ones(10)*np.median(train_labels),
-                                              np.ones(10)*np.min(train_labels),
-                                              np.ones(10)*np.max(train_labels),
+                                             [np.ones(10) * np.mean(train_labels),
+                                              np.ones(10) * np.median(train_labels),
+                                              np.ones(10) * np.median(train_labels),
+                                              np.ones(10) * np.min(train_labels),
+                                              np.ones(10) * np.max(train_labels),
                                               np.ones(10)]):
         yield check_dummy_regressor_predict, model_args, train_labels, expected_output
+
+
+@raises(ValueError)
+def test_learner_api_rescaling_classifier():
+    """
+    Check that rescaling fails for classifiers
+    """
+
+    _ = rescaled(LogisticRegression)