In [1]:
from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np
from scipy.stats import norm
from sklearn.utils import shuffle
from math import sqrt
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import scale, LabelEncoder
from sklearn.model_selection import GridSearchCV, train_test_split

In [None]:
class CW(BaseEstimator, ClassifierMixin):
    def __init__(self, eta=0.90, n_iter=5, shuffle=True, random_state=None):
        self.eta = eta
        self.n_iter = n_iter
        self.shuffle = shuffle
        self.random_state = random_state
        self.phi = norm.cdf(self.eta)**(-1)
        
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.mu = np.ones(n_features).reshape(n_features, 1)
        self.sigma = np.diag([1.0] * n_features)
        
        for epoch in range(self.n_iter):
            if self.shuffle:
                X, y = shuffle(X, y, random_state=self.random_state)
            
            for i in range(n_samples):
                self._update(X[i:i + 1], y[i:i + 1])

    def _update(self, X, y):
        m = int(y * X @ self.mu)
        v = int(X @ (self.sigma @ X.T))
        gamma = (- (1 + 2 * self.phi * m) + sqrt((1 + 2 * self.phi * m) ** 2 - 8 * self.phi * (m - self.phi * v))) / (4 * self.phi * v)
        alpha = max(0, gamma)
        self.mu = self.mu + alpha * y * self.sigma @ X.T
        self.sigma = np.linalg.inv(np.linalg.inv(self.sigma) + 2 * alpha * self.phi * np.diag(X ** 2))
        
    def predict(self, X):
        return np.sign(np.dot(X, self.mu))

In [None]:
usps = fetch_mldata('usps')
X = scale(usps.data)
y = usps.target
y_bin = np.array([1 if i >= 5 else -1 for i in y])

X_train, X_test, y_train, y_test = train_test_split(X, y_bin, test_size=0.3, random_state=0)

'''
estimator = CW(eta = 0.90, n_iter=5, shuffle=True, random_state=0)
estimator.fit(X_train, y_train)
y_pred = estimator.predict(X_test)
print('CW : {0:.4f}'.format(estimator.score(X_test, y_test)))
'''

score = GridSearchCV(
    estimator = CW(n_iter=5, shuffle=True, random_state=0),
    param_grid = {'eta': (0.001, 0.01, 0.1, 1.0)},
    cv = 5,
    verbose = 2
).fit(X_train, y_train).score(X_test, y_test)
print('CW : {0:.4f}'.format(score))

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] eta=0.001 .......................................................
[CV] .............................................. eta=0.001 -   0.0s
[CV] eta=0.001 .......................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.3min remaining:    0.0s


[CV] .............................................. eta=0.001 -   0.0s
[CV] eta=0.001 .......................................................


## Confidence Weighted Learning

\begin{align}
    \textbf{$\mu$}_{i + 1} &= \textbf{$\mu$}_i + \alpha y_i \Sigma_i \mathbf{x}_i \\
    \Sigma_{i + 1} &= \Sigma_i - \Sigma_i \mathbf{x}_i \frac{2 \alpha \phi}{1 + 2 \alpha \phi \mathbf{x}^{\mathrm{T}}_i \Sigma_i \mathbf{x}_i} \mathbf{x}^{\mathrm{T}}_i \Sigma_i \\
    \alpha &=
\end{align}