Skip to content

Commit

Permalink
[MRG+1] Added 'average' option to passive aggressive classifier/regre…
Browse files Browse the repository at this point in the history
  • Loading branch information
aesuli authored and TomDLT committed Oct 3, 2016
1 parent 625c824 commit eb8fe5d
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 30 deletions.
6 changes: 5 additions & 1 deletion doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ Changelog
New features
............


Enhancements
............

Expand All @@ -27,6 +26,11 @@ Enhancements
used. (`#7441 <https://github.com/scikit-learn/scikit-learn/pull/7441>`_)
by `Nelson Liu`_.

- Added ``average`` parameter to perform weights averaging in
:class:`linear_model.PassiveAggressiveClassifier`. (`#4939
<https://github.com/scikit-learn/scikit-learn/pull/4939>`_) by `Andrea
Esuli`_.

Bug fixes
.........

Expand Down
28 changes: 25 additions & 3 deletions sklearn/linear_model/passive_aggressive.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,15 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
.. versionadded:: 0.17
parameter *class_weight* to automatically weight samples.
average : bool or int, optional
When set to True, computes the averaged SGD weights and stores the
result in the ``coef_`` attribute. If set to an int greater than 1,
averaging will begin once the total number of samples seen reaches
average. So average=10 will begin averaging after seeing 10 samples.
.. versionadded:: 0.19
parameter *average* to use weights averaging in SGD
Attributes
----------
coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\
Expand All @@ -84,9 +93,10 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)
"""

def __init__(self, C=1.0, fit_intercept=True, n_iter=5, shuffle=True,
verbose=0, loss="hinge", n_jobs=1, random_state=None,
warm_start=False, class_weight=None):
warm_start=False, class_weight=None, average=False):
super(PassiveAggressiveClassifier, self).__init__(
penalty=None,
fit_intercept=fit_intercept,
Expand All @@ -97,6 +107,7 @@ def __init__(self, C=1.0, fit_intercept=True, n_iter=5, shuffle=True,
eta0=1.0,
warm_start=warm_start,
class_weight=class_weight,
average=average,
n_jobs=n_jobs)
self.C = C
self.loss = loss
Expand Down Expand Up @@ -210,6 +221,15 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
When set to True, reuse the solution of the previous call to fit as
initialization, otherwise, just erase the previous solution.
average : bool or int, optional
When set to True, computes the averaged SGD weights and stores the
result in the ``coef_`` attribute. If set to an int greater than 1,
averaging will begin once the total number of samples seen reaches
average. So average=10 will begin averaging after seeing 10 samples.
.. versionadded:: 0.19
parameter *average* to use weights averaging in SGD
Attributes
----------
coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\
Expand All @@ -233,7 +253,8 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
"""
def __init__(self, C=1.0, fit_intercept=True, n_iter=5, shuffle=True,
verbose=0, loss="epsilon_insensitive",
epsilon=DEFAULT_EPSILON, random_state=None, warm_start=False):
epsilon=DEFAULT_EPSILON, random_state=None, warm_start=False,
average=False):
super(PassiveAggressiveRegressor, self).__init__(
penalty=None,
l1_ratio=0,
Expand All @@ -244,7 +265,8 @@ def __init__(self, C=1.0, fit_intercept=True, n_iter=5, shuffle=True,
shuffle=shuffle,
verbose=verbose,
random_state=random_state,
warm_start=warm_start)
warm_start=warm_start,
average=average)
self.C = C
self.loss = loss

Expand Down
81 changes: 55 additions & 26 deletions sklearn/linear_model/tests/test_passive_aggressive.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from sklearn.utils.testing import assert_true
import numpy as np
import scipy.sparse as sp

Expand Down Expand Up @@ -69,24 +70,38 @@ def project(self, X):
def test_classifier_accuracy():
for data in (X, X_csr):
for fit_intercept in (True, False):
clf = PassiveAggressiveClassifier(C=1.0, n_iter=30,
fit_intercept=fit_intercept,
random_state=0)
clf.fit(data, y)
score = clf.score(data, y)
assert_greater(score, 0.79)
for average in (False, True):
clf = PassiveAggressiveClassifier(C=1.0, n_iter=30,
fit_intercept=fit_intercept,
random_state=0,
average=average)
clf.fit(data, y)
score = clf.score(data, y)
assert_greater(score, 0.79)
if average:
assert_true(hasattr(clf, 'average_coef_'))
assert_true(hasattr(clf, 'average_intercept_'))
assert_true(hasattr(clf, 'standard_intercept_'))
assert_true(hasattr(clf, 'standard_coef_'))


def test_classifier_partial_fit():
classes = np.unique(y)
for data in (X, X_csr):
clf = PassiveAggressiveClassifier(C=1.0,
fit_intercept=True,
random_state=0)
for t in range(30):
clf.partial_fit(data, y, classes)
score = clf.score(data, y)
assert_greater(score, 0.79)
for average in (False, True):
clf = PassiveAggressiveClassifier(C=1.0,
fit_intercept=True,
random_state=0,
average=average)
for t in range(30):
clf.partial_fit(data, y, classes)
score = clf.score(data, y)
assert_greater(score, 0.79)
if average:
assert_true(hasattr(clf, 'average_coef_'))
assert_true(hasattr(clf, 'average_intercept_'))
assert_true(hasattr(clf, 'standard_intercept_'))
assert_true(hasattr(clf, 'standard_coef_'))


def test_classifier_refit():
Expand Down Expand Up @@ -203,26 +218,40 @@ def test_regressor_mse():

for data in (X, X_csr):
for fit_intercept in (True, False):
reg = PassiveAggressiveRegressor(C=1.0, n_iter=50,
fit_intercept=fit_intercept,
random_state=0)
reg.fit(data, y_bin)
pred = reg.predict(data)
assert_less(np.mean((pred - y_bin) ** 2), 1.7)
for average in (False, True):
reg = PassiveAggressiveRegressor(C=1.0, n_iter=50,
fit_intercept=fit_intercept,
random_state=0,
average=average)
reg.fit(data, y_bin)
pred = reg.predict(data)
assert_less(np.mean((pred - y_bin) ** 2), 1.7)
if average:
assert_true(hasattr(reg, 'average_coef_'))
assert_true(hasattr(reg, 'average_intercept_'))
assert_true(hasattr(reg, 'standard_intercept_'))
assert_true(hasattr(reg, 'standard_coef_'))


def test_regressor_partial_fit():
y_bin = y.copy()
y_bin[y != 1] = -1

for data in (X, X_csr):
reg = PassiveAggressiveRegressor(C=1.0,
fit_intercept=True,
random_state=0)
for t in range(50):
reg.partial_fit(data, y_bin)
pred = reg.predict(data)
assert_less(np.mean((pred - y_bin) ** 2), 1.7)
for average in (False, True):
reg = PassiveAggressiveRegressor(C=1.0,
fit_intercept=True,
random_state=0,
average=average)
for t in range(50):
reg.partial_fit(data, y_bin)
pred = reg.predict(data)
assert_less(np.mean((pred - y_bin) ** 2), 1.7)
if average:
assert_true(hasattr(reg, 'average_coef_'))
assert_true(hasattr(reg, 'average_intercept_'))
assert_true(hasattr(reg, 'standard_intercept_'))
assert_true(hasattr(reg, 'standard_coef_'))


def test_regressor_correctness():
Expand Down

0 comments on commit eb8fe5d

Please sign in to comment.