Skip to content

Commit

Permalink
Merge pull request #439 from EducationalTestingService/update-scikit-…
Browse files Browse the repository at this point in the history
…learn

Update scikit-learn to v0.20.1
  • Loading branch information
desilinguist committed Dec 4, 2018
2 parents fc37bc7 + e78fb28 commit 48e5f1f
Show file tree
Hide file tree
Showing 10 changed files with 41 additions and 23 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ before_install:
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then export PATH=/home/travis/miniconda2/bin:$PATH; else export PATH=/home/travis/miniconda3/bin:$PATH; fi
- conda update --yes conda
install:
- conda install --yes --channel defaults --channel conda-forge python=$TRAVIS_PYTHON_VERSION numpy scipy beautifulsoup4 six scikit-learn==0.19.1 joblib prettytable python-coveralls ruamel.yaml
- conda install --yes --channel defaults --channel conda-forge python=$TRAVIS_PYTHON_VERSION numpy scipy beautifulsoup4 six scikit-learn==0.20.1 joblib prettytable python-coveralls ruamel.yaml
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then conda install --yes --channel defaults configparser mock; fi
- if [ ${WITH_PANDAS_AND_SEABORN} == "true" ]; then conda install --yes --channel defaults pandas seaborn; fi
# Have to use pip for nose-cov because its entry points are not supported by conda yet
Expand Down
4 changes: 2 additions & 2 deletions conda-recipe/skll/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ build:
requirements:
build:
- python
- scikit-learn ==0.19.1
- scikit-learn ==0.20.1
- joblib >=0.8
- setuptools
- six
Expand All @@ -57,7 +57,7 @@ requirements:

run:
- python
- scikit-learn ==0.19.1
- scikit-learn ==0.20.1
- joblib >=0.8
- six
- prettytable
Expand Down
2 changes: 1 addition & 1 deletion conda_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
scikit-learn==0.19.1
scikit-learn==0.20.1
six
PrettyTable
beautifulsoup4
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
scikit-learn==0.19.1
scikit-learn==0.20.1
six
PrettyTable
beautifulsoup4
Expand Down
2 changes: 1 addition & 1 deletion requirements_rtd.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
configparser==3.5.0b2
logutils
mock
scikit-learn==0.19.1
scikit-learn==0.20.1
six
PrettyTable
beautifulsoup4
Expand Down
2 changes: 1 addition & 1 deletion skll/data/featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ def has_labels(self):
Whether or not this FeatureSet has any finite labels.
"""
if self.labels is not None:
return not (np.issubdtype(self.labels.dtype, float) and
return not (np.issubdtype(self.labels.dtype, np.floating) and
np.isnan(np.min(self.labels)))
else:
return False
Expand Down
4 changes: 2 additions & 2 deletions skll/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -1374,13 +1374,13 @@ def _generate_learning_curve_plots(experiment_name,
# each of the featuresets
for fs_name, df_fs in df_melted.groupby('featureset_name'):
fig = plt.figure();
fig.set_size_inches(2.5*num_learners, 2.5*num_metrics);
fig.set_size_inches(2.5 * num_learners, 2.5 * num_metrics);

# compute ylimits for this feature set for each objective
with sns.axes_style('whitegrid', {"grid.linestyle": ':',
"xtick.major.size": 3.0}):
g = sns.FacetGrid(df_fs, row="metric", col="learner_name",
hue="variable", size=2.5, aspect=1,
hue="variable", height=2.5, aspect=1,
margin_titles=True, despine=True, sharex=False,
sharey=False, legend_out=False, palette="Set1")
colors = train_color, test_color = sns.color_palette("Set1")[:2]
Expand Down
26 changes: 22 additions & 4 deletions skll/learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,7 @@ def __init__(self, model_type, probability=False, feature_scaling='none',
if issubclass(self._model_type, SVC):
self._model_kwargs['cache_size'] = 1000
self._model_kwargs['probability'] = self.probability
self._model_kwargs['gamma'] = 'auto'
if self.probability:
self.logger.warning('Because LibSVM does an internal '
'cross-validation to produce probabilities, '
Expand All @@ -866,14 +867,22 @@ def __init__(self, model_type, probability=False, feature_scaling='none',
self._model_kwargs['n_estimators'] = 500
elif issubclass(self._model_type, SVR):
self._model_kwargs['cache_size'] = 1000
self._model_kwargs['gamma'] = 'auto'
elif issubclass(self._model_type, SGDClassifier):
self._model_kwargs['loss'] = 'log'
self._model_kwargs['max_iter'] = None
self._model_kwargs['tol'] = None
elif issubclass(self._model_type, SGDRegressor):
self._model_kwargs['max_iter'] = None
self._model_kwargs['tol'] = None
elif issubclass(self._model_type, RANSACRegressor):
self._model_kwargs['loss'] = 'squared_loss'
elif issubclass(self._model_type, (MLPClassifier, MLPRegressor)):
self._model_kwargs['learning_rate'] = 'invscaling'
self._model_kwargs['max_iter'] = 500

elif issubclass(self._model_type, LogisticRegression):
self._model_kwargs['solver'] = 'liblinear'
self._model_kwargs['multi_class'] = 'auto'

if issubclass(self._model_type,
(AdaBoostClassifier, AdaBoostRegressor,
Expand Down Expand Up @@ -911,9 +920,18 @@ def __init__(self, model_type, probability=False, feature_scaling='none',
AdaBoostClassifier,
RANSACRegressor)) and ('base_estimator' in model_kwargs):
base_estimator_name = model_kwargs['base_estimator']
base_estimator_kwargs = {} if base_estimator_name in ['LinearRegression',
'MultinomialNB',
'SVR'] else {'random_state': 123456789}
if base_estimator_name in ['LinearRegression', 'MultinomialNB']:
base_estimator_kwargs = {}
elif base_estimator_name in ['SGDClassifier', 'SGDRegressor']:
base_estimator_kwargs = {'max_iter': None,
'tol': None,
'random_state': 123456789}
elif base_estimator_name == 'SVR':
base_estimator_kwargs = {'gamma': 'auto'}
elif base_estimator_name == 'SVC':
base_estimator_kwargs = {'gamma': 'auto', 'random_state': 123456789}
else:
base_estimator_kwargs = {'random_state': 123456789}
base_estimator = globals()[base_estimator_name](**base_estimator_kwargs)
model_kwargs['base_estimator'] = base_estimator
self._model_kwargs.update(model_kwargs)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def test_sparse_predict():
[(0.45, 0.52), (0.52, 0.5),
(0.48, 0.5), (0.49, 0.5),
(0.43, 0), (0.53, 0.57),
(0.49, 0.49), (0.48, 0.5)]):
(0.49, 0.49), (0.5, 0.49)]):
yield check_sparse_predict, learner_name, expected_scores[0], False
if learner_name != 'MultinomialNB':
yield check_sparse_predict, learner_name, expected_scores[1], True
Expand Down
18 changes: 9 additions & 9 deletions tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def check_rescaling(name, grid_search=False):
train_p_std = np.std(train_predictions)
rescaled_train_p_std = np.std(rescaled_train_predictions)
assert_less(abs(rescaled_train_p_std - train_y_std),
abs(train_p_std - train_y_std))
abs(train_p_std - train_y_std))


def test_rescaling():
Expand Down Expand Up @@ -403,14 +403,14 @@ def check_ensemble_models(name,
else:
expected_feature_importances = [0.10266744, 0.18681777, 0.71051479]
else:
expected_feature_importances = ([0.204,
0.172,
0.178,
0.212,
0.234] if use_feature_hashing else
[0.262,
0.288,
0.45])
expected_feature_importances = ([0.471714,
0.022797,
0.283377,
0.170823,
0.051288] if use_feature_hashing else
[0.082621,
0.166652,
0.750726])

feature_importances = learner.model.feature_importances_
assert_allclose(feature_importances, expected_feature_importances,
Expand Down

0 comments on commit 48e5f1f

Please sign in to comment.