In [None]:
from abc import ABCMeta, abstractmethod
import numpy as np
from scipy.stats import norm
from scipy import sparse
from math import sqrt
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils import shuffle
from sklearn.datasets import load_svmlight_file, load_svmlight_files, make_classification
from sklearn.preprocessing import scale
from sklearn.model_selection import GridSearchCV, train_test_split

__all__ = ['SCW1', 'SCW2']


class BaseSCW(BaseEstimator, metaclass=ABCMeta):
    @abstractmethod
    def __init__(self, C=1.0, ETA=0.9, n_iter=5, shuffle=True, random_state=0, n_samples=3000):
        self.C = C
        self.ETA = ETA
        self.n_iter = n_iter
        self.shuffle = shuffle
        self.random_state = random_state
        self.n_samples = n_samples
        
        self.mu = None
        self.Sigma = None
        self.phi = norm.ppf(ETA)
        
        self.has_fitted = False
        
    def fit(self, X, Y):
        if np.ndim(X) != 2:
            raise ValueError("Estimator expects 2 dim array.")

        if not self.has_fitted:
            n_samples, n_features = X.shape
            self.mu = np.zeros((n_features, 1))
            self.Sigma = np.eye(n_features)
            self.has_fitted = True

        for epoch in range(self.n_iter):
            if self.shuffle:
                X, Y = shuffle(X, Y, random_state=self.random_state)
            if n_samples >= self.n_samples:
                for i in range(self.n_samples):
                    self._update(X[i:i + 1].T, Y[i:i + 1])
            else:
                 for i in range(n_samples):
                    self._update(X[i:i + 1].T, Y[i:i + 1])
        return self
    
    def _update(self, x, y):
        if y != 1 and y != -1:
            raise ValueError("Data label must be 1 or -1.")
        
        m = (y * self.mu.T @ x).item(0)
        v = (x.T @ (self.Sigma @ x)).item(0)
        
        if max(0, self.phi * sqrt(v) - m) > 0:
            alpha = self.calc_alpha(m, v)
            beta = self.calc_beta(m, v, alpha)
            
            self.mu = self.mu + alpha * y * self.Sigma @ x
            self.Sigma = self.Sigma - beta * self.Sigma @ x @ x.T @ self.Sigma
            
    @abstractmethod
    def calc_alpha(self, m, v):
        # calc in a child class
        pass

    def calc_beta(self, m, v, alpha):
        a = alpha * v * self.phi
        b = sqrt(a**2 + 4 * v)
        u = (- a + b)**2 / 4
        return (alpha * self.phi) / (sqrt(u) + a)
    
    def predict(self, X):
        return np.sign(X @ self.mu)


class SCW1(BaseSCW, ClassifierMixin):
    def __init__(self, C=1.0, ETA=0.9, n_iter=5, shuffle=True, random_state=0, n_samples=3000):
        super(SCW1, self).__init__(C=C, ETA=ETA, n_iter=n_iter, shuffle=shuffle, random_state=random_state, n_samples=3000)
        
        self.psi = 1 + self.phi**2 / 2
        self.zeta = 1 + self.phi**2
                   
    def calc_alpha(self, m, v):
        phi2 = self.phi**2
        a = (m * phi2)**2 / 4
        b = v * phi2 * self.zeta
        c = (- m * self.psi + sqrt(a + b)) / (v * self.zeta)
        return min(self.C, max(0, c))

class SCW2(BaseSCW, ClassifierMixin):
    def __init__(self, C=1.0, ETA=0.9, n_iter=5, shuffle=True, random_state=0, n_samples=3000):
        super(SCW2, self).__init__(C=C, ETA=ETA, n_iter=n_iter, shuffle=shuffle, random_state=random_state, n_samples=3000)
        
    def calc_alpha(self, m, v):
        phi2 = self.phi**2
        n = v + 1 / (2 * self.C)
        a = phi2 * (m * v)**2
        b = 4 * n * v * (n + v * phi2)
        gamma = self.phi * sqrt(a + b)

        c = - (2 * m * n + phi2 * m * v)
        d = n**2 + n * v * phi2
        e = (c + gamma) / (2 * d)
        return max(0, e)

In [None]:
X, Y = load_svmlight_file('datasets/splice')
X = scale(X.todense())

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

iter = 10
result1 = np.array([])
result2 = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW1(n_iter=5, shuffle=True, random_state=i, n_samples=3000),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'ETA': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, Y_train)
    score = estimator.score(X_test, Y_test)
    
    print('SCW1: the best parameter: ', estimator.best_params_)
    print('SCW1: score: {0:.4f}'.format(score))
    result1 = np.append(result1, score)
    
print('SCW1: average_score: {0:.4f}'.format(result1.mean()))
print('SCW1: std_score: {0:.4f}'.format(result1.std()))
    
for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW2(n_iter=5, shuffle=True, random_state=i, n_samples=3000),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'ETA': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, Y_train)
    score = estimator.score(X_test, Y_test)
    
    print('SCW2: the best parameter: ', estimator.best_params_)
    print('SCW2: score: {0:.4f}'.format(score))
    result2 = np.append(result2, score)
    
print('SCW2: average_score: {0:.4f}'.format(result2.mean()))
print('SCW2: std_score: {0:.4f}'.format(result2.std()))

print('total:')
print('SCW1: average_score: {0:.4f}'.format(result1.mean()))
print('SCW1: std_score: {0:.4f}'.format(result1.std()))
print('SCW2: average_score: {0:.4f}'.format(result2.mean()))
print('SCW2: std_score: {0:.4f}'.format(result2.std()))

In [None]:
X, Y = load_svmlight_file('datasets/svmguide3')
X = scale(X.todense())

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

iter = 10
result1 = np.array([])
result2 = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW1(n_iter=5, shuffle=True, random_state=i, n_samples=3000),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'ETA': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, Y_train)
    score = estimator.score(X_test, Y_test)
    
    print('SCW1: the best parameter: ', estimator.best_params_)
    print('SCW1: score: {0:.4f}'.format(score))
    result1 = np.append(result1, score)
    
print('SCW1: average_score: {0:.4f}'.format(result1.mean()))
print('SCW1: std_score: {0:.4f}'.format(result1.std()))
    
for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW2(n_iter=5, shuffle=True, random_state=i, n_samples=3000),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'ETA': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, Y_train)
    score = estimator.score(X_test, Y_test)
    
    print('SCW2: the best parameter: ', estimator.best_params_)
    print('SCW2: score: {0:.4f}'.format(score))
    result2 = np.append(result2, score)
    
print('SCW2: average_score: {0:.4f}'.format(result2.mean()))
print('SCW2: std_score: {0:.4f}'.format(result2.std()))

print('total:')
print('SCW1: average_score: {0:.4f}'.format(result1.mean()))
print('SCW1: std_score: {0:.4f}'.format(result1.std()))
print('SCW2: average_score: {0:.4f}'.format(result2.mean()))
print('SCW2: std_score: {0:.4f}'.format(result2.std()))

In [None]:
X, Y = load_svmlight_file('datasets/mushrooms')
X = scale(X.todense())
Y = np.array([1 if i == 2 else -1 for i in Y])

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

iter = 10
result1 = np.array([])
result2 = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW1(n_iter=5, shuffle=True, random_state=i, n_samples=3000),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'ETA': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, Y_train)
    score = estimator.score(X_test, Y_test)
    
    print('SCW1: the best parameter: ', estimator.best_params_)
    print('SCW1: score: {0:.4f}'.format(score))
    result1 = np.append(result1, score)
    
print('SCW1: average_score: {0:.4f}'.format(result1.mean()))
print('SCW1: std_score: {0:.4f}'.format(result1.std()))
    
for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW2(n_iter=5, shuffle=True, random_state=i, n_samples=3000),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'ETA': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, Y_train)
    score = estimator.score(X_test, Y_test)
    
    print('SCW2: the best parameter: ', estimator.best_params_)
    print('SCW2: score: {0:.4f}'.format(score))
    result2 = np.append(result2, score)
    
print('SCW2: average_score: {0:.4f}'.format(result2.mean()))
print('SCW2: std_score: {0:.4f}'.format(result2.std()))

print('total:')
print('SCW1: average_score: {0:.4f}'.format(result1.mean()))
print('SCW1: std_score: {0:.4f}'.format(result1.std()))
print('SCW2: average_score: {0:.4f}'.format(result2.mean()))
print('SCW2: std_score: {0:.4f}'.format(result2.std()))

In [None]:
X, Y = make_classification(n_samples=4000, n_features=65, n_informative=65, n_redundant=0, n_classes=2, n_clusters_per_class=1, 
                          weights=None, flip_y=0, shuffle=True, random_state=0)
X = scale(X)
Y = np.array([1 if i == 1 else -1 for i in Y])

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

iter = 10
result1 = np.array([])
result2 = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW1(n_iter=5, shuffle=True, random_state=i, n_samples=3000),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'ETA': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, Y_train)
    score = estimator.score(X_test, Y_test)
    
    print('SCW1: the best parameter: ', estimator.best_params_)
    print('SCW1: score: {0:.4f}'.format(score))
    result1 = np.append(result1, score)
    
print('SCW1: average_score: {0:.4f}'.format(result1.mean()))
print('SCW1: std_score: {0:.4f}'.format(result1.std()))
    
for i in range(iter):
    estimator = GridSearchCV(
        estimator = SCW2(n_iter=5, shuffle=True, random_state=i, n_samples=3000),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16], 'ETA': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, Y_train)
    score = estimator.score(X_test, Y_test)
    
    print('SCW2: the best parameter: ', estimator.best_params_)
    print('SCW2: score: {0:.4f}'.format(score))
    result2 = np.append(result2, score)
    
print('SCW2: average_score: {0:.4f}'.format(result2.mean()))
print('SCW2: std_score: {0:.4f}'.format(result2.std()))

print('total:')
print('SCW1: average_score: {0:.4f}'.format(result1.mean()))
print('SCW1: std_score: {0:.4f}'.format(result1.std()))
print('SCW2: average_score: {0:.4f}'.format(result2.mean()))
print('SCW2: std_score: {0:.4f}'.format(result2.std()))