In [1]:
from abc import ABCMeta, abstractmethod
import numpy as np
from scipy.stats import norm
from scipy import sparse
from math import sqrt
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils import shuffle
from sklearn.datasets import load_svmlight_file, load_svmlight_files, make_classification
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split

class BaseSCW(BaseEstimator, metaclass=ABCMeta):
    @abstractmethod
    def __init__(self, C=1.0, ETA=0.75, n_iter=5, shuffle=True, random_state=0, n_samples_limit=3000):
        self.C = C
        self.phi = norm.ppf(ETA)
        self.n_iter = n_iter
        self.shuffle = shuffle
        self.random_state = random_state
        self.n_samples_limit = n_samples_limit
        
        self.mu = None
        self.Sigma = None
        
        self.has_fitted = False
        
    def fit(self, X, Y):
        if np.ndim(X) != 2:
            raise ValueError('Estimator expects 2 dim array.')

        if not self.has_fitted:
            n_samples, n_features = X.shape
            self.mu = np.zeros((n_features, 1))
            self.Sigma = np.eye(n_features)
        
            self.has_fitted = True

        for epoch in range(self.n_iter):
            if self.shuffle:
                X, Y = shuffle(X, Y, random_state=self.random_state)
                
            if n_samples >= self.n_samples_limit:
                for i in range(self.n_samples_limit):
                    self._update(X[i:i + 1].reshape(-1, 1), Y[i:i + 1].item(0))
            else:
                 for i in range(n_samples):
                    self._update(X[i:i + 1].reshape(-1, 1), Y[i:i + 1].item(0))
        return self
    
    def _update(self, x, y):
        if y != 1 and y != -1:
            raise ValueError('Data label must be 1 or -1.')
        
        m = (y * self.mu.T @ x).item(0)
        v = (x.T @ (self.Sigma @ x)).item(0)
        
        if self.phi * sqrt(v) - m > 0:
            alpha = self._calc_alpha(m, v)
            beta = self._calc_beta(m, v, alpha)
            
            self.mu = self.mu + alpha * y * self.Sigma @ x
            self.Sigma = np.diag(np.diag(self.Sigma - beta * self.Sigma @ x @ x.T @ self.Sigma))
            
    @abstractmethod
    def _calc_alpha(self, m, v):
        # calc in a child class
        pass

    def _calc_beta(self, m, v, alpha):
        a = alpha * v * self.phi
        b = sqrt(a**2 + 4 * v)
        u = (- a + b)**2 / 4
        return (alpha * self.phi) / (sqrt(u) + a)
    
    def predict(self, X):
        return np.sign(X @ self.mu)


class SCW1(BaseSCW, ClassifierMixin):
    def __init__(self, C=1.0, ETA=0.75, n_iter=5, shuffle=True, random_state=0, n_samples_limit=3000):
        super(SCW1, self).__init__(C=C, ETA=ETA, n_iter=n_iter, shuffle=shuffle, random_state=random_state, n_samples_limit=n_samples_limit)
        self.psi = 1 + self.phi**2 / 2
        self.zeta = 1 + self.phi**2
        
    def _calc_alpha(self, m, v):
        phi2 = self.phi**2
        a = (m * phi2)**2 / 4
        b = v * phi2 * self.zeta
        c = (- m * self.psi + sqrt(a + b)) / (v * self.zeta)
        return min(self.C, max(0, c))

class SCW2(BaseSCW, ClassifierMixin):
    def __init__(self, C=1.0, ETA=0.75, n_iter=5, shuffle=True, random_state=0, n_samples_limit=3000):
        super(SCW2, self).__init__(C=C, ETA=ETA, n_iter=n_iter, shuffle=shuffle, random_state=random_state, n_samples_limit=n_samples_limit)
        
    def _calc_alpha(self, m, v):
        phi2 = self.phi**2
        n = v + 1 / (2 * self.C)
        a = phi2 * (m * v)**2
        b = 4 * n * v * (n + v * phi2)
        gamma = self.phi * sqrt(a + b)

        c = - (2 * m * n + phi2 * m * v)
        d = n**2 + n * v * phi2
        e = (c + gamma) / (2 * d)
        return max(0, e)

# SCW-I

In [2]:
def GridSearchCV_SCW1(X, Y):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    
    iter = 10
    cv = 5
    result = np.array([])
    a, b = X_train.shape
    c = a // cv
    
    for i in range(iter):
        bestscore = 0
        bestC = 2**-4
        bestETA = 0.5

        for C in [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16]:
            for ETA in [0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]:
                result_cv = np.array([])
                
                for j in range(cv):
                    ind = np.ones(a, dtype=bool)
                    mask = np.arange(c * j, c * (j + 1), 1)
                    ind[mask] = False
                    
                    estimator_cv = SCW1(C=C, ETA=ETA, n_iter=5, shuffle=True, random_state=i, n_samples_limit=3000)
                    score_cv = estimator_cv.fit(X_train[ind], Y_train[ind]).score(X_train[~ind], Y_train[~ind])
                    result_cv = np.append(result_cv, score_cv)
                
                score_cv_sum = result_cv.sum()
                
                if score_cv_sum > bestscore:
                    bestscore = score_cv_sum
                    bestC = C
                    bestETA = ETA
                    
                print('Current parameters:', bestC, bestETA)
        
        estimator = SCW1(C=bestC, ETA=bestETA, n_iter=5, shuffle=True, random_state=i, n_samples_limit=3000)
        score = estimator.fit(X_train, Y_train).score(X_test, Y_test)
        result = np.append(result, score)
        
        print('iter:{0:d} end'.format(i))
        print()
        
    print('Average_score: {0:.4f}'.format(result.mean()))
    print('Std_score: {0:.4f}'.format(result.std()))

In [3]:
X, Y = load_svmlight_file('datasets/splice')
X = scale(X.todense())

GridSearchCV_SCW1(X, Y)

Current parameters: 0.0625 0.55
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Cu

In [4]:
X, Y = load_svmlight_file('datasets/svmguide3')
X = scale(X.todense())

GridSearchCV_SCW1(X, Y)

Current parameters: 0.0625 0.55
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.7
Current parameters: 0.0625 0.7
Current parameters: 0.0625 0.7
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current para

In [3]:
X, Y = make_classification(n_samples=3000, n_features=50, n_informative=50, n_redundant=0, n_classes=2, n_clusters_per_class=1, 
                          weights=None, flip_y=0, shuffle=True, random_state=0)
X = scale(X)
Y = np.array([1 if i == 1 else -1 for i in Y])

GridSearchCV_SCW1(X, Y)

Current parameters: 0.0625 0.55
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.125 0.65
Current parameters: 0.125 0.65
Current parameters: 0.125 0.65
Current parameters: 0.125 0.65
Current parameters: 0.125 0.65
Current parameters: 0.125 0.65
Current parameters: 0.125 0.65
Current parameters: 0.125 0.65
Current parameters: 0.125 0.65
Current parameters: 0.125 0.65
Current parameters: 0.25 0.7
Current parameters: 0.25 0.75
Current parameters: 0.25 0.75
Current parameters: 0.25 0.75
Current parameters: 0.25 0.75
Current parameters: 0.25 0.75
Current parameters: 0.25 0.75
Current parameters: 0.25 0.75
Current parameters: 0.25 0.75
Current parameters: 0.25 0.75
Current parameters: 0.25 0.75
Current pa

In [4]:
X, Y = load_svmlight_file('datasets/usps')
X = scale(X.todense())
Y = np.array([1 if i >= 5 else -1 for i in Y])

GridSearchCV_SCW1(X, Y)

Current parameters: 0.0625 0.55
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.7
Current parameters: 0.0625 0.75
Current parameters: 0.0625 0.75
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Current parameters: 0.0625 0.9
Cur

# SCW-II

In [15]:
def GridSearchCV_SCW2(X, Y):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    
    iter = 10
    cv = 5
    result = np.array([])
    a, b = X_train.shape
    c = a // cv
    
    for i in range(iter):
        bestscore = 0
        bestC = 2**-4
        bestETA = 0.5

        for C in [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16]:
            for ETA in [0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]:
                result_cv = np.array([])
                
                for j in range(cv):
                    ind = np.ones(a, dtype=bool)
                    mask = np.arange(c * j, c * (j + 1), 1)
                    ind[mask] = False
                    
                    estimator_cv = SCW2(C=C, ETA=ETA, n_iter=5, shuffle=True, random_state=i, n_samples_limit=3000)
                    score_cv = estimator_cv.fit(X_train[ind], Y_train[ind]).score(X_train[~ind], Y_train[~ind])
                    result_cv = np.append(result_cv, score_cv)
                
                score_cv_sum = result_cv.sum()
                
                if score_cv_sum > bestscore:
                    bestscore = score_cv_sum
                    bestC = C
                    bestETA = ETA
                    
                print('Current parameters:', bestC, bestETA)
        
        estimator = SCW2(C=bestC, ETA=bestETA, n_iter=5, shuffle=True, random_state=i, n_samples_limit=3000)
        score = estimator.fit(X_train, Y_train).score(X_test, Y_test)
        result = np.append(result, score)
        
        print('iter:{0:d} end'.format(i))
        print()
        
    print('Average_score: {0:.4f}'.format(result.mean()))
    print('Std_score: {0:.4f}'.format(result.std()))

In [8]:
X, Y = load_svmlight_file('datasets/splice')
X = scale(X.todense())

GridSearchCV_SCW2(X, Y)

Current parameters: 0.0625 0.55
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.7
Current parameters: 0.0625 0.75
Current parameters: 0.0625 0.75
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.85
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current parameters: 0.0625 0.95
Current pa

In [9]:
X, Y = load_svmlight_file('datasets/svmguide3')
X = scale(X.todense())

GridSearchCV_SCW2(X, Y)

Current parameters: 0.0625 0.55
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.6
Current parameters: 0.125 0.6
Current parameters: 0.125 0.6
Current parameters: 0.125 0.6
Current parameters: 0.125 0.6
Current parameters: 0.125 0.6
Current parameters: 0.125 0.6
Current parameters: 0.125 0.6
Current parameters: 0.125 0.6
Current parameters: 0.125 0.6
Current parameters: 0.25 0.6
Current parameters: 0.25 0.6
Current parameters: 0.25 0.6
Current parameters: 0.25 0.6
Current parameters: 0.25 0.6
Current parameters: 0.25 0.6
Current parameters: 0.25 0.6
Current parameters: 0.25 0.6
Current parameters: 0.25 0.6
Current parameters: 0.5 0.6
Current parameters: 0.5 0.6
Current parameters: 0.5 0.6
Current parameters: 0.5 0.6
Current parameters: 0.5 0.6
Current parameters

In [11]:
X, Y = make_classification(n_samples=3000, n_features=50, n_informative=50, n_redundant=0, n_classes=2, n_clusters_per_class=1, 
                          weights=None, flip_y=0, shuffle=True, random_state=0)
X = scale(X)
Y = np.array([1 if i == 1 else -1 for i in Y])

GridSearchCV_SCW2(X, Y)

Current parameters: 0.0625 0.55
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.65
Current p

In [16]:
X, Y = load_svmlight_file('datasets/usps')
X = scale(X.todense())
Y = np.array([1 if i >= 5 else -1 for i in Y])

GridSearchCV_SCW2(X, Y)

Current parameters: 0.0625 0.55
Current parameters: 0.0625 0.6
Current parameters: 0.0625 0.65
Current parameters: 0.0625 0.7
Current parameters: 0.0625 0.75
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Current parameters: 0.0625 0.8
Curre