Skip to content

Commit

Permalink
Merge pull request #462 from EducationalTestingService/increase-coverage
Browse files Browse the repository at this point in the history
Write new tests to increase test coverage
  • Loading branch information
desilinguist committed Feb 14, 2019
2 parents 1067873 + 021d250 commit b2b5b22
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 14 deletions.
4 changes: 2 additions & 2 deletions skll/learner.py
Expand Up @@ -1385,8 +1385,8 @@ def train(self, examples, param_grid=None, grid_search_folds=3,
# selected learner
if grid_search:
if not grid_objective:
raise ValueError("You must specify a grid objective "
"if doing grid search.")
raise ValueError("Grid search is on by default. You must either "
"specify a grid objective or turn off grid search.")
if self.model_type._estimator_type == 'regressor':
# types 2-4 are valid for all regression models
if grid_objective in _CLASSIFICATION_ONLY_OBJ_FUNCS:
Expand Down
13 changes: 13 additions & 0 deletions tests/other/custom_learner.txt
@@ -0,0 +1,13 @@
# License: BSD 3 clause
"""
A simple wrapper around the existing LogisticRegression class, for testing
custom learners functionality.

:author: Michael Heilman (mheilman@ets.org)
"""

from sklearn.linear_model import LogisticRegression


class CustomLogisticRegressionWrapper(LogisticRegression):
pass
Binary file added tests/other/test_load_saved_model.2.model
Binary file not shown.
Binary file added tests/other/test_load_saved_model.3.model
Binary file not shown.
82 changes: 80 additions & 2 deletions tests/test_classification.py
Expand Up @@ -17,6 +17,7 @@
import json
import os
import re
import sys
import warnings

from io import open
Expand All @@ -26,15 +27,14 @@
from nose.tools import eq_, assert_almost_equal, raises

from sklearn.exceptions import ConvergenceWarning
from sklearn.feature_extraction import FeatureHasher
from sklearn.metrics import accuracy_score

from skll.data import FeatureSet
from skll.data.readers import NDJReader
from skll.data.writers import NDJWriter
from skll.config import _parse_config_file
from skll.experiments import run_configuration
from skll.learner import Learner
from skll.learner import Learner, _train_and_score
from skll.learner import _DEFAULT_PARAM_GRIDS

from utils import (make_classification_data, make_regression_data,
Expand Down Expand Up @@ -671,3 +671,81 @@ def test_bad_xval_float_classes():

yield check_bad_xval_float_classes, True
yield check_bad_xval_float_classes, False


def check_train_and_score_function(model_type):
"""
Check that the _train_and_score() function works as expected
"""

# create train and test data
(train_fs,
test_fs) = make_classification_data(num_examples=500,
train_test_ratio=0.7,
num_features=5,
use_feature_hashing=False,
non_negative=True)

# call _train_and_score() on this data
estimator_name = 'LogisticRegression' if model_type == 'classifier' else 'Ridge'
metric = 'accuracy' if model_type == 'classifier' else 'pearson'
learner1 = Learner(estimator_name)
train_score1, test_score1 = _train_and_score(learner1, train_fs, test_fs, metric)

# this should yield identical results when training another instance
# of the same learner without grid search and shuffling and evaluating
# that instance on the train and the test set
learner2 = Learner(estimator_name)
learner2.train(train_fs, grid_search=False, shuffle=False)
train_score2 = learner2.evaluate(train_fs, output_metrics=[metric])[-1][metric]
test_score2 = learner2.evaluate(test_fs, output_metrics=[metric])[-1][metric]

eq_(train_score1, train_score2)
eq_(test_score1, test_score2)


def test_train_and_score_function():
yield check_train_and_score_function, 'classifier'
yield check_train_and_score_function, 'regressor'


@raises(ValueError)
def test_learner_api_grid_search_no_objective():

(train_fs,
test_fs) = make_classification_data(num_examples=500,
train_test_ratio=0.7,
num_features=5,
use_feature_hashing=False,
non_negative=True)
learner = Learner('LogisticRegression')
_ = learner.train(train_fs)


def test_learner_api_load_into_existing_instance():
"""
Check that `Learner.load()` works as expected
"""

# create a LinearSVC instance and train it on some data
learner1 = Learner('LinearSVC')
(train_fs,
test_fs) = make_classification_data(num_examples=200,
num_features=5,
use_feature_hashing=False,
non_negative=True)
learner1.train(train_fs, grid_search=False)

# now use `load()` to replace the existing instance with a
# different saved learner
other_model_file = join(_my_dir, 'other', 'test_load_saved_model.{}.model'.format(sys.version_info[0]))
learner1.load(other_model_file)

# now load the saved model into another instance using the class method
# `from_file()`
learner2 = Learner.from_file(other_model_file)

# check that the two instances are now basically the same
eq_(learner1.model_type, learner2.model_type)
eq_(learner1.model_params, learner2.model_params)
eq_(learner1.model_kwargs, learner2.model_kwargs)
17 changes: 14 additions & 3 deletions tests/test_custom_learner.py
Expand Up @@ -19,10 +19,11 @@
from os.path import abspath, dirname, exists, join

import numpy as np
from nose.tools import raises
from numpy.testing import assert_array_equal
from skll.data import NDJWriter
from skll.experiments import run_configuration
from skll.learner import _DEFAULT_PARAM_GRIDS
from skll.learner import _DEFAULT_PARAM_GRIDS, Learner

from utils import fill_in_config_paths, make_classification_data

Expand Down Expand Up @@ -204,8 +205,7 @@ def test_custom_learner_model_loading():
outprefix = 'test_model_custom_learner'
pred_file = join(_my_dir, 'output',
'{}_{}_CustomLogisticRegressionWrapper'
'_predictions.tsv'.format(outprefix,
outprefix))
'_predictions.tsv'.format(outprefix, outprefix))
preds1 = read_predictions(pred_file)
os.unlink(pred_file)

Expand All @@ -222,3 +222,14 @@ def test_custom_learner_model_loading():

# make sure that they are the same as before
assert_array_equal(preds1, preds2)


@raises(ValueError)
def test_custom_learner_api_missing_file():
_ = Learner('CustomType')


@raises(ValueError)
def test_custom_learner_api_bad_extension():
other_dir = join(_my_dir, 'other')
_ = Learner('_CustomLogisticRegressionWrapper', custom_learner_path=join(other_dir, 'custom_learner.txt'))
24 changes: 17 additions & 7 deletions tests/test_regression.py
Expand Up @@ -21,18 +21,19 @@
from itertools import product
from os.path import abspath, dirname, join, exists

from nose.tools import eq_, assert_almost_equal
from nose.tools import eq_, assert_almost_equal, raises

import numpy as np
from numpy.testing import assert_allclose
from scipy.stats import pearsonr
from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model import LogisticRegression
from sklearn.utils.testing import assert_greater, assert_less

from skll.data import FeatureSet, NDJWriter
from skll.config import _setup_config_parser
from skll.experiments import run_configuration
from skll.learner import Learner
from skll.learner import Learner, rescaled
from skll.learner import _DEFAULT_PARAM_GRIDS

from utils import make_regression_data, fill_in_config_paths_for_fancy_output
Expand Down Expand Up @@ -676,10 +677,19 @@ def test_dummy_regressor_predict():
{"strategy": "quantile", "quantile": 0.0},
{"strategy": "quantile", "quantile": 1.0},
{"strategy": "constant", "constant": 1}],
[np.ones(10)*np.mean(train_labels),
np.ones(10)*np.median(train_labels),
np.ones(10)*np.median(train_labels),
np.ones(10)*np.min(train_labels),
np.ones(10)*np.max(train_labels),
[np.ones(10) * np.mean(train_labels),
np.ones(10) * np.median(train_labels),
np.ones(10) * np.median(train_labels),
np.ones(10) * np.min(train_labels),
np.ones(10) * np.max(train_labels),
np.ones(10)]):
yield check_dummy_regressor_predict, model_args, train_labels, expected_output


@raises(ValueError)
def test_learner_api_rescaling_classifier():
"""
Check that rescaling fails for classifiers
"""

_ = rescaled(LogisticRegression)

0 comments on commit b2b5b22

Please sign in to comment.