Skip to content

Commit

Permalink
Merge pull request #574 from EducationalTestingService/fix-multinomia…
Browse files Browse the repository at this point in the history
…lnb-loading

Fix loading issue with `MultinomialNB` model files.
  • Loading branch information
desilinguist committed Oct 22, 2019
2 parents 9a81b14 + 1bd88f0 commit 9bfa6b3
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 17 deletions.
34 changes: 17 additions & 17 deletions skll/learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1464,21 +1464,22 @@ def _train_setup(self, examples):
self.feat_selector = SelectByMinCount(
min_count=self._min_feature_count)

# Create scaler if we weren't passed one and it's necessary
if not issubclass(self._model_type, MultinomialNB):
if self._feature_scaling != 'none':
scale_with_mean = self._feature_scaling in {
'with_mean', 'both'}
scale_with_std = self._feature_scaling in {'with_std', 'both'}
self.scaler = StandardScaler(copy=True,
with_mean=scale_with_mean,
with_std=scale_with_std)
else:
# Doing this is to prevent any modification of feature values
# using a dummy transformation
self.scaler = StandardScaler(copy=False,
with_mean=False,
with_std=False)
# Create a scaler if we weren't passed one and we are asked
# to do feature scaling; note that we do not support feature
# scaling for `MultinomialNB` learners
if (not issubclass(self._model_type, MultinomialNB) and
self._feature_scaling != 'none'):
scale_with_mean = self._feature_scaling in {'with_mean', 'both'}
scale_with_std = self._feature_scaling in {'with_std', 'both'}
self.scaler = StandardScaler(copy=True,
with_mean=scale_with_mean,
with_std=scale_with_std)
else:
# Doing this is to prevent any modification of feature values
# using a dummy transformation
self.scaler = StandardScaler(copy=False,
with_mean=False,
with_std=False)

def train(self, examples, param_grid=None, grid_search_folds=3,
grid_search=True, grid_objective=None,
Expand Down Expand Up @@ -1647,8 +1648,7 @@ def train(self, examples, param_grid=None, grid_search_folds=3,
'feature values.')

# Scale features if necessary
if not issubclass(self._model_type, MultinomialNB):
xtrain = self.scaler.fit_transform(xtrain)
xtrain = self.scaler.fit_transform(xtrain)

# check whether any feature values are too large
self._check_max_feature_value(xtrain)
Expand Down
24 changes: 24 additions & 0 deletions tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ def tearDown():
for output_file in glob(join(output_dir, 'clf_metrics_objective_overlap*')):
os.unlink(output_file)

for output_file in glob(join(output_dir, 'test_multinomialnb_loading*')):
os.unlink(output_file)

config_files = [join(config_dir,
cfgname) for cfgname in ['test_single_file.cfg',
'test_single_file_saved_subset.cfg']]
Expand Down Expand Up @@ -1739,3 +1742,24 @@ def test_metrics_and_objectives_overlap():
[["f1_score_weighted", "unweighted_kappa", "accuracy"]],
[[], ["accuracy"], ["accuracy", "unweighted_kappa"]]):
yield (check_metrics_and_objectives_overlap, task, metrics, objectives)


def test_multinomialnb_loading():
"""
Make sure we can load MultnomialNB models from disk
"""

output_dir = join(_my_dir, 'output')

learner = Learner('MultinomialNB')
train_fs, test_fs = make_classification_data(num_examples=100, non_negative=True)
learner.train(train_fs, grid_search=False)
model_file = join(output_dir, 'test_multinomialnb_loading.model')
learner.save(model_file)
predictions1 = learner.predict(test_fs)
del learner

learner2 = Learner.from_file(model_file)
predictions2 = learner2.predict(test_fs)

assert_array_equal(predictions1, predictions2)

0 comments on commit 9bfa6b3

Please sign in to comment.