In [1]:
import numpy as np

In [21]:
class ScratchSVMClassifier():
    """
  Scratch implementation of SVM classifier

     Parameters
     ----------
     num_iter : int
       number of iterations
     lr: float
       learning rate
     kernel :str
       Kernel type. Linear kernel (linear) or polynomial kernel (polly)
     threshold : float
       Threshold for choosing support vectors
     verbose : bool
       True to output the learning process

     Attributes
     ----------
     self.n_support_vectors : int
       Number of support vectors
     self.index_support_vectors : ndarray, shape (n_support_vectors,)
       Support vector index
     self.X_sv : ndarray of the following shape, shape(n_support_vectors, n_features)
       Support vector features
     self.lam_sv : ndarray of the following shape, shape(n_support_vectors, 1)
       undetermined multiplier for support vectors
     self.y_sv : ndarray of the following shape, shape(n_support_vectors, 1)
       Support vector labels

    """

    def __init__(self, num_iter, lr, kernel='linear', hit_vector_cnt_threshold = 2, 
                 threshold=1e-5, verbose=False):
        # Record hyperparameters as attributes
        self.iter = num_iter
        self.lr = lr
        self.kernel = kernel
        self.threshold = threshold
        self.verbose = verbose
        self.hit_vector_cnt_threshold = hit_vector_cnt_threshold

    def fit(self, x, y, x_val=None, y_val=None):
        
        """
       Learn an SVM classifier. If validation data is input, the accuracy for that data is also calculated for each iteration.

         Parameters
         ----------
         X : ndarray of the following shape, shape (n_samples, n_features)
             Training data features
         y : ndarray of the following shape, shape (n_samples, )
             Ground truth value of training data
         X_val : ndarray of the following shape, shape (n_samples, n_features)
             Features of validation data
         y_val : ndarray of the following shape, shape (n_samples, )
             Correct value of validation data
        """


        y = y.reshape(len(y), 1)

        if x_val is not None:
             y_val = y_val.reshape(len(y_val), 1)

        bias = np.array([1 for _ in range(x.shape[0])]).reshape(x.shape[0], 1)
        x = np.concatenate((x, bias), axis=1)

        if x_val is not None:
            bias = np.array([1 for _ in range(x_val.shape[0])]).reshape(x_val.shape[0], 1)
            x_val = np.concatenate((x_val, bias), axis=1)

        self.num_of_feature = x.shape[1]
        self.num_of_samples = x.shape[0]

        self.label1_val = np.max(y)
        self.label0_val = np.min(y)
        
        train_data = np.concatenate((x, y), axis=1)
        
        LAMBDA_INIT_MIN = 1
        LAMBDA_INIT_MAX = 10
        LAMBDA_INIT_SCALE = 1e-07
        self.lam = np.random.randint(LAMBDA_INIT_MIN, LAMBDA_INIT_MAX, X.shape[0]) * LAMBDA_INIT_SCALE
        self.lam = np.reshape(self.lam, (len(self.lam), 1))
        self.lam_cal_log = np.zeros((len(self.lam), 1))
        #print("Initial lambda:\n{}".format(self.lam))


        for i in range(0, self.iter):
            self.lam = self._gradient_descent(x, y)
            if self.hit_vector_cnt_threshold <= np.sum(self.lam > self.threshold):
                
                selector = self.lam[:, 0] * np.ones(train_data.shape[0])
                sp_vector = train_data[selector > self.threshold, :]
                label_p_cnt = np.sum([sp_vector[:, :-1] == 1])
                label_n_cnt = np.sum([sp_vector[:, :-1] == -1])

        if self.verbose:
            #verboseをTrueにした際は学習過程を出力
            print()


    def predict(self, x):
        """
        Estimate labels using SVM classifier.

         Parameters
         ----------
         X : ndarray of the following shape, shape (n_samples, n_features)
             sample

         Returns
         -------
             ndarray of the following shape, shape (n_samples, 1)
             Estimation results by SVM classifier
        """

        pred = self._svm_kernel_function(x, x)
        
        return

    def _svm_kernel_function(self, X1, X2):
    
        if self.kernel == 'linear':
            ans = np.dot(X1, X2.T)
        # elif self.kernel == 'rbf':
        #     ans = self.gamma * (np.dot(X1.T, X2) + self.theta0)**self.pow_d
        else:
            ans = 0

        return ans

    def _gradient_descent(self, x, y):

        print(y.shape, self.lam.shape, x.shape)
        tmp1 = y.T * y * self.lam * self._svm_kernel_function(x, x)
        delta = 1 - (np.sum(tmp1, axis=0))
        delta = delta.reshape(len(delta), 1)
        result = self.lam + self.lr * delta.T
        result[result < 0] = 0

        return result

Test

In [22]:
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn import svm

x, y = datasets.make_blobs( n_samples=50, n_features=2, centers=2, 
                           cluster_std=1.05, random_state=40)
y = np.where(y == 0, -1, 1)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=123
)

In [15]:
clf = ScratchSVMClassifier(num_iter=20, lr=0.05)
clf.fit(x_train, y_train)

ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 50 and the array at index 1 has size 40

In [16]:
predictions = clf.predict(x_test)

Decision Boundry

In [18]:
x = iris.data[:100,:2]
y = iris.target[:100]
 (x_train, x_test, y_train, y_test) = train_test_split(x, y, test_size=0.2)
slr = ScratchLogisticRegression(num_iter=1000, lr=0.005, no_bias=True,verbose=False, lam = 0.5)
slr.fit(x_train, y_train, x_test, y_test)

IndentationError: unexpected indent (3028181523.py, line 3)