In [1]:
import numpy as np
from scipy.stats import norm
from scipy import sparse
from math import sqrt
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils import shuffle
from sklearn.datasets import load_svmlight_file, load_svmlight_files, make_classification
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split

class CW(BaseEstimator, ClassifierMixin):
    def __init__(self, ETA=0.75, n_iter=5, shuffle=True, random_state=0, n_samples_limit=3000):
        self.phi =  norm.ppf(ETA)
        self.n_iter = n_iter
        self.shuffle = shuffle
        self.random_state = random_state
        self.n_samples_limit = n_samples_limit
        
        self.mu = None
        self.Sigma = None
        
        self.has_fitted = False
        
    def fit(self, X, Y):
            if np.ndim(X) != 2:
                raise ValueError('Estimator expects 2 dim array.')

            if not self.has_fitted:
                n_samples, n_features = X.shape
                self.mu = np.zeros((n_features, 1))
                self.Sigma = np.eye(n_features)

                self.has_fitted = True

            for epoch in range(self.n_iter):
                if self.shuffle:
                    X, Y = shuffle(X, Y, random_state=self.random_state)
                    
                if n_samples >= self.n_samples_limit:
                    for i in range(self.n_samples_limit):
                        self._update(X[i:i + 1].reshape(-1, 1), Y[i:i + 1].item(0))
                else:
                     for i in range(n_samples):
                        self._update(X[i:i + 1].reshape(-1, 1), Y[i:i + 1].item(0))
            return self

    def _update(self, x, y):
        if y != 1 and y != -1:
            raise ValueError('Data label must be 1 or -1.')
            
        m = (y * self.mu.T @ x).item(0)
        v = (x.T @ (self.Sigma @ x)).item(0)
        a = 1 + 2 * self.phi * m
        gamma = (- a + sqrt(a**2 - 8 * self.phi * (m - self.phi * v))) / (4 * self.phi * v)
        alpha = max(0, gamma)
        self.mu = self.mu + alpha * y * self.Sigma @ x
        self.Sigma = np.linalg.inv(np.linalg.inv(self.Sigma) + 2 * alpha * self.phi * np.diag((x**2).reshape(-1)))
            
    def predict(self, X):
        return np.sign(X @ self.mu)

In [4]:
def GridSearchCV_CW(X, Y):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    
    iter = 10
    cv = 5
    result = np.array([])
    a, b = X_train.shape
    c = a // cv
    
    for i in range(iter):
        bestscore = 0
        bestETA = 0.5

        for ETA in [0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]:
            result_cv = np.array([])

            for j in range(cv):
                ind = np.ones(a, dtype=bool)
                mask = np.arange(c * j, c * (j + 1), 1)
                ind[mask] = False

                estimator_cv = CW(ETA=ETA, n_iter=5, shuffle=True, random_state=i, n_samples_limit=3000)
                score_cv = estimator_cv.fit(X_train[ind], Y_train[ind]).score(X_train[~ind], Y_train[~ind])
                result_cv = np.append(result_cv, score_cv)

            score_cv_sum = result_cv.sum()

            if score_cv_sum > bestscore:
                bestscore = score_cv_sum
                bestETA = ETA

            print('Current parameters:', bestETA)
        
        estimator = CW(ETA=bestETA, n_iter=5, shuffle=True, random_state=i, n_samples_limit=3000)
        score = estimator.fit(X_train, Y_train).score(X_test, Y_test)
        result = np.append(result, score)
        
        print('iter:{0:d} end'.format(i))
        print()
        
    print('Average_score: {0:.4f}'.format(result.mean()))
    print('Std_score: {0:.4f}'.format(result.std()))

In [3]:
X, Y = load_svmlight_file('datasets/splice')
X = scale(X.todense())

GridSearchCV_CW(X, Y)

Current parameters: 0.55
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
iter:0 end

Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.65
Current parameters: 0.65
Current parameters: 0.75
Current parameters: 0.8
Current parameters: 0.8
Current parameters: 0.8
Current parameters: 0.8
iter:1 end

Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
iter:2 end

Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
iter:3 end

Current parameters: 0.55
Current parameters: 0.55
Current parame

In [4]:
X, Y = load_svmlight_file('datasets/svmguide3')
X = scale(X.todense())

GridSearchCV_CW(X, Y)

Current parameters: 0.55
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
iter:0 end

Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
iter:1 end

Current parameters: 0.55
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.7
Current parameters: 0.75
Current parameters: 0.8
Current parameters: 0.85
Current parameters: 0.85
Current parameters: 0.95
iter:2 end

Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.8
Current parameters: 0.85
Current parameters: 0.9
Current parameters: 0.95
iter:3 end

Current parameters: 0.55
Current parameters: 0.6
Current parameter

In [5]:
X, Y = make_classification(n_samples=3000, n_features=50, n_informative=50, n_redundant=0, n_classes=2, n_clusters_per_class=1, 
                          weights=None, flip_y=0, shuffle=True, random_state=0)
X = scale(X)
Y = np.array([1 if i == 1 else -1 for i in Y])

GridSearchCV_CW(X, Y)

Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
iter:0 end

Current parameters: 0.55
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
Current parameters: 0.6
iter:1 end

Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
iter:2 end

Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.7
Current parameters: 0.7
Current parameters: 0.7
Current parameters: 0.7
Current parameters: 0.7
Current parameters: 0.95
iter:3 end

Current parameters: 0.55
Current parameters: 0.6
Current paramete

In [8]:
X, Y = load_svmlight_file('datasets/usps')
X = scale(X.todense())
Y = np.array([1 if i >= 5 else -1 for i in Y])

GridSearchCV_CW(X, Y)

Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
iter:0 end

Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
iter:1 end

Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
iter:2 end

Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
Current parameters: 0.55
iter:3 end

Current parameters: 0.55
Current parameters: 0.55
Cu