In [10]:
import numpy as np

class BaseSVM:
    """
    Base class for SVM with batch gradient descent (for Linear and Kernel SVMs).
    """

    def __init__(self, kernel="linear", degree=3, gamma=0.1, coef0=1, 
                 learning_rate=0.001, lambda_param=0.01, iterations=1000):
        """
        Parameters:
        -----------
        kernel : str
            Kernel type: "linear", "poly", "rbf", or "sigmoid".
        degree : int
            Degree for polynomial kernel.
        gamma : float
            Gamma for RBF/poly kernels.
        coef0 : float
            Independent term in poly/sigmoid kernel.
        learning_rate : float
            Step size for gradient descent.
        lambda_param : float
            Regularization parameter (C = 1/lambda_param).
        iterations : int
            Number of epochs.
        """
        self.kernel = kernel
        self.degree = degree
        self.gamma = gamma
        self.coef0 = coef0
        self.learning_rate = learning_rate
        self.lambda_param = lambda_param
        self.iterations = iterations
        self.w = None
        self.b = None
        self.X_train = None
        self.y_train = None

    def _kernel_function(self, X, Y=None):
        # Computes the kernel matrix between X and Y
        if Y is None:
            Y = X
        if self.kernel == "linear":
            return np.dot(X, Y.T)
        elif self.kernel == "poly":
            return (self.gamma * np.dot(X, Y.T) + self.coef0) ** self.degree
        elif self.kernel == "rbf":
            X_norm = np.sum(X ** 2, axis=-1)
            Y_norm = np.sum(Y ** 2, axis=-1)
            K = -2 * np.dot(X, Y.T) + X_norm[:, None] + Y_norm[None, :]
            return np.exp(-self.gamma * K)
        elif self.kernel == "sigmoid":
            return np.tanh(self.gamma * np.dot(X, Y.T) + self.coef0)
        else:
            raise ValueError("Unknown kernel")

    def fit(self, X, y):
        """
        Fit the SVM model to training data.

        Parameters:
        -----------
        X : numpy.ndarray
            Training features (n_samples, n_features).
        y : numpy.ndarray
            Target values (n_samples,). Must be -1 or 1 for binary.
        """
        n_samples, n_features = X.shape
        y_ = np.where(y <= 0, -1, 1)
        self.X_train = X
        self.y_train = y_

        if self.kernel == "linear":
            # Linear SVM via gradient descent (primal)
            self.w = np.zeros(n_features)
            self.b = 0.0
            for _ in range(self.iterations):
                for idx, xi in enumerate(X):
                    condition = y_[idx] * (np.dot(xi, self.w) + self.b) >= 1
                    if condition:
                        grad_w = 2 * self.lambda_param * self.w
                        grad_b = 0
                    else:
                        grad_w = 2 * self.lambda_param * self.w - np.dot(xi, y_[idx])
                        grad_b = -y_[idx]
                    self.w -= self.learning_rate * grad_w
                    self.b -= self.learning_rate * grad_b
        else:
            # Kernel SVM (dual, simplified for demonstration, not optimized)
            self.alpha = np.zeros(n_samples)
            self.b = 0.0
            K = self._kernel_function(X)
            for _ in range(self.iterations):
                for i in range(n_samples):
                    # Calculate decision function for i-th sample
                    decision = np.sum(self.alpha * y_ * K[:, i]) + self.b
                    condition = y_[i] * decision
                    if condition < 1:
                        self.alpha[i] += self.learning_rate * (1 - condition)
                    # Simple regularization (not a true SMO/dual implementation)
            # For prediction, store alpha
            self.w = None  # Not used in kernel case

    def decision_function(self, X):
        if self.kernel == "linear":
            return np.dot(X, self.w) + self.b
        else:
            K = self._kernel_function(self.X_train, X)
            return np.sum(self.alpha[:, None] * self.y_train[:, None] * K, axis=0) + self.b

    def predict(self, X):
        return np.where(self.decision_function(X) >= 0, 1, -1)

> ## Example usage:

In [11]:
# Generate separable data (two classes)
np.random.seed(42)
X1 = np.random.randn(50, 2) + [2, 2]
X2 = np.random.randn(50, 2) + [-2, -2]
X = np.vstack([X1, X2])
y = np.hstack([np.ones(50), -np.ones(50)])

> ### Linear SVM

In [12]:
print("--- Linear SVM ---")
svm_linear = BaseSVM(kernel="linear", learning_rate=0.001, lambda_param=0.01, iterations=1000)
svm_linear.fit(X, y)
y_pred_linear = svm_linear.predict(X)
print("Accuracy (Linear):", np.mean(y_pred_linear == y))

--- Linear SVM ---
Accuracy (Linear): 1.0


> ### Polynomial SVM

In [13]:
print("--- Polynomial SVM ---")
svm_poly = BaseSVM(kernel="poly", degree=3, gamma=1, coef0=1, learning_rate=0.001, iterations=1000)
svm_poly.fit(X, y)
y_pred_poly = svm_poly.predict(X)
print("Accuracy (Poly):", np.mean(y_pred_poly == y))

--- Polynomial SVM ---
Accuracy (Poly): 1.0


> ### RBF SVM

In [14]:
print("--- RBF SVM ---")
svm_rbf = BaseSVM(kernel="rbf", gamma=0.5, learning_rate=0.001, iterations=1000)
svm_rbf.fit(X, y)
y_pred_rbf = svm_rbf.predict(X)
print("Accuracy (RBF):", np.mean(y_pred_rbf == y))

--- RBF SVM ---
Accuracy (RBF): 1.0


> ### Sigmoid SVM

In [15]:
print("--- Sigmoid SVM ---")
svm_sigmoid = BaseSVM(kernel="sigmoid", gamma=0.01, coef0=0.0, learning_rate=0.001, iterations=1000)
svm_sigmoid.fit(X, y)
y_pred_sigmoid = svm_sigmoid.predict(X)
print("Accuracy (Sigmoid):", np.mean(y_pred_sigmoid == y))

--- Sigmoid SVM ---
Accuracy (Sigmoid): 1.0
