In [1]:
from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np
from scipy.stats import norm
from sklearn.utils import shuffle
from math import sqrt
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import scale, LabelEncoder
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import GridSearchCV



In [2]:
class CW(BaseEstimator, ClassifierMixin):
    def __init__(self, eta=0.90, n_iter=5, shuffle=True, random_state=100):
        self.eta = eta
        self.n_iter = n_iter
        self.shuffle = shuffle
        self.random_state = random_state
        self.phi = norm.cdf(self.eta)**(-1)

    def fit(self, X, y):
        n_samples, n_features = X.shape
        n_samples, n_classes = y.shape
        self.mu = np.ones(n_features)
        self.sigma = np.diag([1.0] * n_features)

        for epoch in range(self.n_iter):
            if self.shuffle:
                X, y = shuffle(X, y, random_state=self.random_state)

            for i in range(n_samples):
                self._update(X[i:i + 1], y[i:i + 1])

    def _update(self, X, y):
        m = y * (np.dot(X, self.mu.T))
        v = np.dot(np.dot(X, self.sigma), X.T)
        gamma = (- (1 + 2 * self.phi * m) + sqrt((1 + 2 * self.phi * m) ** 2 - 8 * self.phi * (m - self.phi * v))) / (4 * self.phi * v)
        alpha = max(0, gamma)
        self.mu = self.mu + alpha * y * np.dot(self.sigma, X.T).T
        self.sigma = (self.sigma ** (-1) + 2 * alpha * self.phi * np.diag(X)) ** (-1)

    def predict(self, X):
        return np.sign(np.dot(X, self.mu))

In [3]:
usps = fetch_mldata('usps')
X = scale(usps.data)
y = usps.target
y_bin = np.array([1 if i >= 5 else -1 for i in y])

X_train, X_test, y_train, y_test = train_test_split(X, y_bin, test_size=0.3, random_state=0)

estimator = CW(eta = 0.90, n_iter=5, shuffle=True, random_state=100)
estimator.fit(X_train, y_train)
y_pred = estimator.predict(X_test)
print('CW : ', score(X_test, y_test))

X: [[ 1.89832846  3.93973756  5.02935469  4.22067368  3.14537848  2.39935706
   1.721294    0.92928689  0.38336785  0.10912692 -0.01847927 -0.02587636
  -0.13210366 -0.25080196 -0.25802783 -0.19972376  0.75371523  1.76872159
   2.29584811  1.93228592  1.46723753  1.19533565  0.97597303  0.6800755
   0.67776716  0.85810625  0.92632773  0.92125929  0.67552493  0.29844456
  -0.05023886 -0.18466953 -0.0392709   0.08985379  0.06797936 -0.18396028
  -0.44915605 -0.58365676 -0.60524676 -0.47726225 -0.03321159  0.59370786
   1.0836427   1.45188028  1.74633993  1.58018228  0.74685465  0.08933998
  -0.26108193 -0.39260735 -0.60572738 -0.87691823 -1.12883264 -1.27914439
  -1.30884232 -1.11897582 -0.92936024 -0.64259408 -0.02320734  0.74875597
   1.80602596  2.49342742  1.93110986  0.79630272 -0.30099472 -0.45152664
  -0.66604798 -0.91915517 -1.13474548 -1.24194563 -1.25656979 -1.14842083
  -1.14980324 -1.20655143 -0.95432604 -0.28177997  1.19392276  2.51652861
   2.47108309  1.30864866 -0.3247002

ValueError: shapes (256,256) and (1,256) not aligned: 256 (dim 1) != 1 (dim 0)

## Confidence Weighted Learning

\begin{align}
    \textbf{$\mu$}_{i + 1} &= \textbf{$\mu$}_i + \alpha y_i \Sigma_i \mathbf{x}_i \\
    \Sigma_{i + 1} &= \Sigma_i - \Sigma_i \mathbf{x}_i \frac{2 \alpha \phi}{1 + 2 \alpha \phi \mathbf{x}^{\mathrm{T}}_i \Sigma_i \mathbf{x}_i} \mathbf{x}^{\mathrm{T}}_i \Sigma_i \\
    \alpha &=
\end{align}