In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
import matplotlib.pyplot as plt
from sklearn.inspection import DecisionBoundaryDisplay

In [2]:
class LinearSVM:
    def __init__(self, learning_rate=0.01, lambda_reg=0.01, epochs=1000, tol=1e-3, patience=10, batch_size=None):
        """
        Initialize the Linear SVM model.

        Parameters:
            learning_rate (float): Learning rate for gradient updates.
            lambda_reg (float): L2 regularization strength.
            epochs (int): Number of training iterations.
            tol (float): Tolerance for early stopping.
            patience (int): Number of epochs to wait for improvement before stopping.
            batch_size (int or None): Size of mini-batches for SGD. If None, use full-batch.
        """
        self.learning_rate = learning_rate
        self.lambda_reg = lambda_reg
        self.epochs = epochs
        self.tol = tol
        self.patience = patience
        self.batch_size = batch_size
        self.weights = None
        self.bias = None
        self.loss_history = []

    def _initialize_weights(self, n_features):
        """
        Initialize weights and bias to small random values.

        Parameters:
            n_features (int): Number of features in the input data.
        """
        self.weights = np.random.randn(n_features) * 0.01
        self.bias = 0.0

    def _compute_hinge_loss(self, X, y):
        """
        Compute the hinge loss function.

        Parameters:
            X (np.array): Feature matrix (n_samples, n_features).
            y (np.array): Target labels (+1 or -1).

        Returns:
            float: The hinge loss value.
        """
        margins = 1 - y * (np.dot(X, self.weights) + self.bias)
        hinge_loss = np.maximum(0, margins)  # Hinge loss for each sample
        l2_regularization = (self.lambda_reg / 2) * np.sum(self.weights**2)
        return np.mean(hinge_loss) + l2_regularization

    def _validate_inputs(self, X, y):
        """
        Validate the input data and labels.

        Parameters:
            X (np.array): Feature matrix.
            y (np.array): Target labels.

        Raises:
            ValueError: If input shapes are inconsistent or labels are not +1/-1.
        """
        if len(X) != len(y):
            raise ValueError("Number of samples in X and y must match.")
        if not np.all(np.isin(y, [-1, 1])):
            raise ValueError("Labels must be either +1 or -1.")

    def fit(self, X, y, verbose=False, verbose_interval=5):
        """
        Train the SVM model using SGD.

        Parameters:
            X (np.array): Feature matrix (n_samples, n_features).
            y (np.array): Target labels (+1 or -1).
            verbose (bool): If True, print loss progress.
            verbose_interval (int): Interval (in epochs) for printing progress.
        """
        self._validate_inputs(X, y)
        n_samples, n_features = X.shape
        self._initialize_weights(n_features)

        best_loss = float("inf")
        no_improve_count = 0

        for epoch in range(self.epochs):
            # Shuffle data for each epoch
            indices = np.arange(n_samples)
            np.random.shuffle(indices)
            X, y = X[indices], y[indices]

            # Mini-batch processing
            if self.batch_size:
                num_batches = int(np.ceil(n_samples / self.batch_size))
            else:
                num_batches = 1
                self.batch_size = n_samples

            for batch_idx in range(num_batches):
                start = batch_idx * self.batch_size
                end = min(start + self.batch_size, n_samples)
                X_batch, y_batch = X[start:end], y[start:end]

                # Compute margins
                margins = y_batch * (np.dot(X_batch, self.weights) + self.bias)

                # Gradient update
                mask = margins < 1  # Only update for samples violating margin
                dW = self.lambda_reg * self.weights - np.dot(X_batch[mask].T, y_batch[mask]) / len(X_batch)
                db = -np.sum(y_batch[mask]) / len(X_batch)

                # Apply SGD update
                self.weights -= self.learning_rate * dW
                self.bias -= self.learning_rate * db

            # Compute loss at the end of the epoch
            loss = self._compute_hinge_loss(X, y)
            self.loss_history.append(loss)

            # Verbose logging
            if verbose and epoch % verbose_interval == 0:
                print(f"Epoch {epoch}: Loss = {loss:.6f}")

            # Early stopping
            if loss < best_loss - self.tol:
                best_loss = loss
                no_improve_count = 0
            else:
                no_improve_count += 1
                if no_improve_count >= self.patience:
                    if verbose:
                        print(f"Early stopping at epoch {epoch}. Best loss: {best_loss:.6f}")
                    break

    def predict(self, X):
        """
        Predict class labels.

        Parameters:
            X (np.array): Feature matrix.

        Returns:
            np.array: Predicted class labels (+1 or -1).
        """
        return np.sign(np.dot(X, self.weights) + self.bias)

    def decision_function(self, X):
        """
        Compute the decision function values.

        Parameters:
            X (np.array): Feature matrix.

        Returns:
            np.array: Decision function values.
        """
        return np.dot(X, self.weights) + self.bias

    def score(self, X, y):
        """
        Compute the accuracy of the model.

        Parameters:
            X (np.array): Feature matrix.
            y (np.array): True labels (+1 or -1).

        Returns:
            float: Accuracy score.
        """
        predictions = self.predict(X)
        return np.mean(predictions == y)

In [3]:
cancer = load_breast_cancer()
X = cancer.data[:, :2]
y = cancer.target
y = 2 * y - 1
    
# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)
    
# Split data
X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=282025
)
    
# Train model
model = LinearSVM(learning_rate=0.01, batch_size=32, epochs=1000)
model.fit(X_train, y_train)
    
# Evaluate
y_pred = model.predict(X_test)
accuracy = np.mean(y_pred == y_test)
print(f"\nTest accuracy: {accuracy:.4f}")


Test accuracy: 0.8772
