In [1]:
from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np
from scipy.stats import norm
from sklearn.utils import shuffle
from math import sqrt
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import scale, LabelEncoder
from sklearn.model_selection import GridSearchCV, train_test_split

In [2]:
class SCW1(BaseEstimator, ClassifierMixin):
    def __init__(self, C=1.0, eta=0.90, n_iter=5, shuffle=True, random_state=0):
        self.C = C
        self.eta = eta
        self.n_iter = n_iter
        self.shuffle = shuffle
        self.random_state = random_state
        self.phi = norm.cdf(self.eta)**(-1)
        self.psi = 1 + self.phi**2 / 2
        self.zeta = 1 + self.phi**2
        
    def fit(self, X, Y):
        n_samples, n_features = X.shape
        self.mu = np.zeros(n_features).reshape(n_features, 1)
        self.sigma = np.diag([1.0] * n_features)
        
        for epoch in range(self.n_iter):
            if self.shuffle:
                X, Y = shuffle(X, Y, random_state=self.random_state)
            
            for i in range(n_samples):
                self._update(X[i:i + 1].T, Y[i:i + 1])

    def _update(self, X, y):
        m = float(y * self.mu.T @ X)
        v = float(X.T @ (self.sigma @ X))
        alpha = min(self.C, max(0, (-m * self.psi + sqrt(m**2 * self.phi**4 / 4 + v * self.phi**2 * self.zeta)) / (v * self.zeta)))
        u = (-alpha * v * self.phi + sqrt(alpha**2 * v**2 * self.phi ** 2 + 4 * v))**2 / 4
        beta = alpha * self.phi / (sqrt(u) + v * alpha * self.phi)

        self.mu = self.mu + alpha * y * self.sigma @ X
        self.sigma = self.sigma - beta * self.sigma @ X @ X.T @ self.sigma
        
    def predict(self, X):
        return np.sign(X @ self.mu)

In [5]:
dataset = fetch_mldata('svmguide3')
X = scale(dataset.data)
y = dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

iter = 5
result = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW1(n_iter=5, shuffle=True, random_state=i),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'eta': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 2
    )
    
    estimator.fit(X_train, y_train)
    score = estimator.score(X_test, y_test)
    
    print('the best parameter: ', estimator.best_params_)
    print('score: {0:.4f}'.format(score))
    result = np.append(result, score)
    
print('average_score: {0:.4f}'.format(result.mean()))
print('std_score: {0:.4f}'.format(result.std()))

Fitting 5 folds for each of 90 candidates, totalling 450 fits
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] .

[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   42.4s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] .

[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   42.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] .

[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   42.2s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


the best parameter:  {'eta': 0.5, 'C': 2}
score: 0.7902
Fitting 5 folds for each of 90 candidates, totalling 450 fits
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ......

[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   42.3s finished


[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.5, C=0.0625 ...............................................
[CV] ...................................... eta=0.5, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.55, C=0.0625 ..............................................
[CV] ..................................... eta=0.55, C=0.0625 -   0.0s
[CV] eta=0.6, C=0.0625 ...............................................
[CV] .

[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   42.3s finished


In [None]:
dataset = fetch_mldata('usps')
X = scale(dataset.data)
y = dataset.target
y_bin = np.array([1 if i >= 5  else -1 for i in y])

X_train, X_test, y_train, y_test = train_test_split(X, y_bin, test_size=0.3, random_state=0)

iter = 5
result = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW1(n_iter=5, shuffle=True, random_state=i),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'eta': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 2
    )
    
    estimator.fit(X_train, y_train)
    score = estimator.score(X_test, y_test)
    
    print('the best parameter: ', estimator.best_params_)
    print('score: {0:.4f}'.format(score))
    result = np.append(result, score)
    
print('average_score: {0:.4f}'.format(result.mean()))
print('std_score: {0:.4f}'.format(result.std()))

In [None]:
dataset = fetch_mldata('ijcnn1')
X = scale(dataset.data)
y = dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

iter = 5
result = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW1(n_iter=5, shuffle=True, random_state=i),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'eta': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 2
    )
    
    estimator.fit(X_train, y_train)
    score = estimator.score(X_test, y_test)
    
    print('the best parameter: ', estimator.best_params_)
    print('score: {0:.4f}'.format(score))
    result = np.append(result, score)
    
print('average_score: {0:.4f}'.format(result.mean()))
print('std_score: {0:.4f}'.format(result.std()))

In [None]:
from scipy import sparse

dataset = fetch_mldata('mushrooms')
X = scale(dataset.data.todense())
y = dataset.target
y_bin = np.array([1 if i == 1 else -1 for i in y])

X_train, X_test, y_train, y_test = train_test_split(X, y_bin, test_size=0.3, random_state=0)

iter = 5
result = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW1(n_iter=5, shuffle=True, random_state=i),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'eta': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 2
    )

    estimator.fit(X_train, y_train)
    score = estimator.score(X_test, y_test)
    
    print('the best parameter: ', estimator.best_params_)
    print('score: {0:.4f}'.format(score))
    result = np.append(result, score)
    
print('average_score: {0:.4f}'.format(result.mean()))
print('std_score: {0:.4f}'.format(result.std()))

In [None]:
from scipy import sparse

dataset = fetch_mldata('covtype.binary')
X = scale(dataset.data.todense())
y = dataset.target
y_bin = np.array([1 if i == 1 else -1 for i in y])

X_train, X_test, y_train, y_test = train_test_split(X, y_bin, test_size=0.3, random_state=0)

iter = 5
result = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW1(n_iter=5, shuffle=True, random_state=i),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'eta': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 2
    )
    
    estimator.fit(X_train, y_train)
    score = estimator.score(X_test, y_test)
    
    print('the best parameter: ', estimator.best_params_)
    print('score: {0:.4f}'.format(score))
    result = np.append(result, score)
    
print('average_score: {0:.4f}'.format(result.mean()))
print('std_score: {0:.4f}'.format(result.std()))