In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

class ScratchSVMClassifier():
    def __init__(self, num_iter=1000, lr=0.01, kernel='linear', 
                 gamma=1, theta0=0, degree=2, threshold=1e-5, verbose=False):
        self.iter = num_iter
        self.lr = lr
        self.kernel = kernel
        self.gamma = gamma
        self.theta0 = theta0
        self.degree = degree
        self.threshold = threshold
        self.verbose = verbose
        self.n_support_vectors = None
        self.X_sv = None
        self.lam_sv = None
        self.y_sv = None
        self.b = 0

    def _kernel_function(self, x1, x2):
        """Implement both linear and polynomial kernels"""
        if self.kernel == 'linear':
            return np.dot(x1, x2)
        elif self.kernel == 'poly':
            return (self.gamma * np.dot(x1, x2) + self.theta0) ** self.degree
        else:
            raise ValueError("Unsupported kernel")

    def fit(self, X, y):
        """Train SVM using gradient ascent on dual problem"""
        n_samples = X.shape[0]
        lam = np.zeros(n_samples)
        y_ = np.where(y <= 0, -1, 1)  # Convert to {-1, 1}

        # Gradient ascent
        for iter_ in range(self.iter):
            for i in range(n_samples):
                # Calculate gradient for each lambda
                grad = 1 - sum(
                    lam[j] * y_[i] * y_[j] * self._kernel_function(X[i], X[j])
                    for j in range(n_samples)
                )
                lam[i] += self.lr * grad
                lam[i] = max(lam[i], 0)  # Constraint: lambda >= 0

        # Store support vectors
        sv_idx = lam > self.threshold
        self.n_support_vectors = np.sum(sv_idx)
        self.X_sv = X[sv_idx]
        self.lam_sv = lam[sv_idx].reshape(-1, 1)
        self.y_sv = y_[sv_idx].reshape(-1, 1)

        # Calculate bias (b)
        if self.n_support_vectors > 0:
            self.b = np.mean([
                self.y_sv[i] - sum(
                    self.lam_sv[j] * self.y_sv[j] * 
                    self._kernel_function(self.X_sv[i], self.X_sv[j])
                    for j in range(self.n_support_vectors)
                )
                for i in range(self.n_support_vectors)
            ])

    def predict(self, X):
        """Make predictions using support vectors"""
        decisions = np.array([
            self.b + sum(
                self.lam_sv[j] * self.y_sv[j] * 
                self._kernel_function(x, self.X_sv[j])
                for j in range(self.n_support_vectors)
            )
            for x in X
        ])
        return np.where(decisions >= 0, 1, 0)

    def visualize_decision_boundary(self, X, y, resolution=0.02):
        """Visualize decision boundary with support vectors"""
        # Create grid
        x1_min, x1_max = X[:, 0].min()-1, X[:, 0].max()+1
        x2_min, x2_max = X[:, 1].min()-1, X[:, 1].max()+1
        xx, yy = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                            np.arange(x2_min, x2_max, resolution))
        
        # Predict and plot
        Z = self.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
        plt.contourf(xx, yy, Z, alpha=0.3)
        plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.coolwarm, s=20)
        
        # Plot support vectors
        if self.X_sv is not None:
            plt.scatter(self.X_sv[:,0], self.X_sv[:,1],
                       facecolors='none', edgecolors='k',
                       s=100, linewidths=1.5, label="Support Vectors")
        plt.legend()
        plt.show()

####
X, y = make_classification(n_samples=100, n_features=2, n_redundant=0, 
                          n_clusters_per_class=1, random_state=42)

# Linear kernel
linear_svm = ScratchSVMClassifier(kernel='linear', verbose=True)
linear_svm.fit(X, y)
linear_svm.visualize_decision_boundary(X, y)

# Polynomial kernel (degree=2)
poly_svm = ScratchSVMClassifier(kernel='poly', degree=2, verbose=True)
poly_svm.fit(X, y)
poly_svm.visualize_decision_boundary(X, y)