In [1]:
import numpy as np
from scipy.stats import norm
from scipy import sparse
from math import sqrt
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils import shuffle
from sklearn.datasets import load_svmlight_file, load_svmlight_files, make_classification
from sklearn.preprocessing import scale
from sklearn.model_selection import GridSearchCV, train_test_split

In [2]:
class SCW1(BaseEstimator, ClassifierMixin):
    def __init__(self, C=1.0, eta=0.90, n_iter=5, shuffle=True, random_state=0, n_samples=3000):
        self.C = C
        self.eta = eta
        self.n_iter = n_iter
        self.shuffle = shuffle
        self.random_state = random_state
        self.n_samples = n_samples
        self.phi = norm.ppf(self.eta)
        self.psi = 1 + self.phi**2 / 2
        self.zeta = 1 + self.phi**2
        
    def fit(self, X, Y):
        n_samples, n_features = X.shape
        self.mu = np.zeros(n_features).reshape(n_features, 1)
        self.sigma = np.diag([1.0] * n_features)
        
        for epoch in range(self.n_iter):
            if self.shuffle:
                X, Y = shuffle(X, Y, random_state=self.random_state)
            if n_samples >= self.n_samples:
                for i in range(self.n_samples):
                    self._update(X[i:i + 1].T, Y[i:i + 1])
            else:
                 for i in range(n_samples):
                    self._update(X[i:i + 1].T, Y[i:i + 1])

    def _update(self, X, y):
        m = float(y * self.mu.T @ X)
        v = float(X.T @ (self.sigma @ X))
        alpha = min(self.C, max(0, (-m * self.psi + sqrt(m**2 * self.phi**4 / 4 + v * self.phi**2 * self.zeta)) / (v * self.zeta)))
        u = (-alpha * v * self.phi + sqrt(alpha**2 * v**2 * self.phi** 2 + 4 * v))**2 / 4
        beta = alpha * self.phi / (sqrt(u) + v * alpha * self.phi)

        self.mu = self.mu + alpha * y * self.sigma @ X
        self.sigma = self.sigma - beta * self.sigma @ X @ X.T @ self.sigma
        
    def predict(self, X):
        return np.sign(X @ self.mu)

In [3]:
X, Y = load_svmlight_file('datasets/splice')
X = scale(X.todense())

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

iter = 5
result = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW1(n_iter=5, shuffle=True, random_state=i),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'eta': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, Y_train)
    score = estimator.score(X_test, Y_test)
    
    print('the best parameter: ', estimator.best_params_)
    print('score: {0:.4f}'.format(score))
    result = np.append(result, score)
    
print('average_score: {0:.4f}'.format(result.mean()))
print('std_score: {0:.4f}'.format(result.std()))

Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:  1.0min finished


the best parameter:  {'C': 0.0625, 'eta': 0.5}
score: 0.8033
Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   57.6s finished


the best parameter:  {'C': 0.0625, 'eta': 0.5}
score: 0.8033
Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   57.6s finished


the best parameter:  {'C': 2, 'eta': 0.5}
score: 0.8167
Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   56.7s finished


the best parameter:  {'C': 2, 'eta': 0.5}
score: 0.7867
Fitting 5 folds for each of 90 candidates, totalling 450 fits
the best parameter:  {'C': 0.0625, 'eta': 0.5}
score: 0.8067
average_score: 0.8033
std_score: 0.0097


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   56.4s finished


In [4]:
X, y = load_svmlight_file('datasets/svmguide3')
X = scale(X.todense())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

iter = 5
result = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW1(n_iter=5, shuffle=True, random_state=i),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'eta': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, y_train)
    score = estimator.score(X_test, y_test)
    
    print('the best parameter: ', estimator.best_params_)
    print('score: {0:.4f}'.format(score))
    result = np.append(result, score)
    
print('average_score: {0:.4f}'.format(result.mean()))
print('std_score: {0:.4f}'.format(result.std()))

Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   38.0s finished


the best parameter:  {'C': 0.0625, 'eta': 0.5}
score: 0.7453
Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   37.4s finished


the best parameter:  {'C': 0.25, 'eta': 0.5}
score: 0.7373
Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   37.6s finished


the best parameter:  {'C': 4, 'eta': 0.5}
score: 0.7480
Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   37.5s finished


the best parameter:  {'C': 4, 'eta': 0.5}
score: 0.7614
Fitting 5 folds for each of 90 candidates, totalling 450 fits
the best parameter:  {'C': 2, 'eta': 0.5}
score: 0.7212
average_score: 0.7426
std_score: 0.0132


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:   37.5s finished


In [5]:
X, y = load_svmlight_file('datasets/mushrooms')
X = scale(X.todense())
y = np.array([1 if i == 2 else -1 for i in y])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

iter = 5
result = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW1(n_iter=5, shuffle=True, random_state=i),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'eta': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, y_train)
    score = estimator.score(X_test, y_test)
    
    print('the best parameter: ', estimator.best_params_)
    print('score: {0:.4f}'.format(score))
    result = np.append(result, score)
    
print('average_score: {0:.4f}'.format(result.mean()))
print('std_score: {0:.4f}'.format(result.std()))

Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed: 11.9min finished


the best parameter:  {'C': 0.25, 'eta': 0.5}
score: 1.0000
Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed: 12.4min finished


the best parameter:  {'C': 0.25, 'eta': 0.5}
score: 1.0000
Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed: 12.2min finished


the best parameter:  {'C': 0.25, 'eta': 0.5}
score: 1.0000
Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed: 12.5min finished


the best parameter:  {'C': 0.25, 'eta': 0.5}
score: 1.0000
Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed: 12.1min finished


the best parameter:  {'C': 0.25, 'eta': 0.5}
score: 1.0000
average_score: 1.0000
std_score: 0.0000


In [6]:
X, y = make_classification(n_samples=4000, n_features=65, n_informative=65, n_redundant=0, n_classes=2, n_clusters_per_class=1, 
                          weights=None, flip_y=0, shuffle=True, random_state=0)
X = scale(X)
y = y = np.array([1 if i == 1 else -1 for i in y])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

iter = 5
result = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW1(n_iter=5, shuffle=True, random_state=i),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'eta': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 2
    )
    
    estimator.fit(X_train, y_train)
    score = estimator.score(X_test, y_test)
    
    print('the best parameter: ', estimator.best_params_)
    print('score: {0:.4f}'.format(score))
    result = np.append(result, score)
    
print('average_score: {0:.4f}'.format(result.mean()))
print('std_score: {0:.4f}'.format(result.std()))

Fitting 5 folds for each of 90 candidates, totalling 450 fits
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s


[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] .

[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:  4.4min finished


the best parameter:  {'C': 0.0625, 'eta': 0.5}
score: 0.9575
Fitting 5 folds for each of 90 candidates, totalling 450 fits
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.6s remaining:    0.0s


[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] .

[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:  4.4min finished


the best parameter:  {'C': 0.0625, 'eta': 0.5}
score: 0.9650
Fitting 5 folds for each of 90 candidates, totalling 450 fits
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s


[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] .

[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:  4.5min finished


the best parameter:  {'C': 0.25, 'eta': 0.5}
score: 0.9558
Fitting 5 folds for each of 90 candidates, totalling 450 fits
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.5s remaining:    0.0s


[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] .

[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:  4.4min finished


the best parameter:  {'C': 0.125, 'eta': 0.5}
score: 0.9575
Fitting 5 folds for each of 90 candidates, totalling 450 fits
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s


[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.5 ...............................................
[CV] ...................................... C=0.0625, eta=0.5 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] ..................................... C=0.0625, eta=0.55 -   0.0s
[CV] C=0.0625, eta=0.55 ..............................................
[CV] .

[Parallel(n_jobs=1)]: Done 450 out of 450 | elapsed:  4.3min finished


the best parameter:  {'C': 0.0625, 'eta': 0.5}
score: 0.9608
average_score: 0.9593
std_score: 0.0033
