In [None]:
"""
Training some sparse kernel machines
"""

import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_moons
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# setting random_state to a fixed value will make the output deterministic
X, y = make_moons(n_samples=100, noise=0.4, random_state=10)

# can be found by grid search e.g. via cross-validation
C = 5
gamma = 0.3

# step size in the mesh for plotting
h = 0.05
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

In [None]:
def transform_features_linear_kernel(XX, X=X):
    """Linear kernel: K(x, x') = x^T x'"""
    return XX.dot(X.T)

def transform_features_rbf(XX, gamma=gamma, X=X):
    """RBF kernel: K(x, x') = exp(-gamma * ||x - x'||^2)"""
    distances = np.sum((XX[:, np.newaxis] - X) ** 2, axis=-1)
    return np.exp(-distances * gamma)

X_rbf_manual = transform_features_rbf(X) # Kernel matrix from sample matrix
print(f"RBF design matrix shape: {X_rbf_manual.shape}")

In [None]:
fig, axes = plt.subplots(3, 3, figsize=(15, 15))

for threshold, ax in zip([0.05, 0.2, 0.5], axes):

    # train and plot probability map
    clf = LogisticRegression(C=C, penalty="l1", solver="saga", multi_class="ovr", max_iter=10000)
    clf.fit(X_rbf_manual, y)

    # mark all weights that are significantly different from zero as support vectors
    support_vectors = np.abs(clf.coef_) > threshold*(np.max(np.abs(clf.coef_))-np.min(np.abs(clf.coef_))) + np.min(np.abs(clf.coef_))
    # for prediction only use the support vectors
    X_reduced = np.zeros_like(X)
    X_reduced[support_vectors[0]] = X[support_vectors[0]]

    Z = clf.predict_proba(transform_features_rbf(XX=np.c_[xx.ravel(), yy.ravel()], X=X_reduced))
    Z = Z[:, 0].reshape(xx.shape)
    ax[0].pcolor(xx, yy, Z)
    ax[0].contour(xx, yy, Z, colors="k", levels=[0.5])
    ax[0].set_title("RBF kernel logisitic regression L1 regularization")
    ax[0].scatter(
        X[support_vectors[0], 0],
        X[support_vectors[0], 1],
        s=200,
        facecolor="none",
        edgecolor="w",
        label=f"support vectors ($\\alpha$ > {threshold:.2f})",
    )

    # train and plot probability map
    clf = LogisticRegression(C=C, penalty="l2", solver="saga", multi_class="ovr", max_iter=10000)
    clf.fit(X_rbf_manual, y)

    # mark all weights that are significantly different from zero as support vectors
    support_vectors = np.abs(clf.coef_) > threshold*(np.max(np.abs(clf.coef_))-np.min(np.abs(clf.coef_))) + np.min(np.abs(clf.coef_))
    # for prediction only use the support vectors
    X_reduced = np.zeros_like(X)
    X_reduced[support_vectors[0]] = X[support_vectors[0]]

    ZZ = clf.predict_proba(transform_features_rbf(XX=np.c_[xx.ravel(), yy.ravel()]))
    ZZ = ZZ[:, 0].reshape(xx.shape)
    ax[1].pcolor(xx, yy, ZZ)
    ax[1].contour(xx, yy, ZZ, colors="k", levels=[0.5])
    ax[1].set_title("RBF kernel logisitic regression L2 regularization")
    # mark all weights that are significantly different from zero as support vectors
    support_vectors = np.abs(clf.coef_) > threshold*(np.max(np.abs(clf.coef_))-np.min(np.abs(clf.coef_))) + np.min(np.abs(clf.coef_))
    ax[1].scatter(
        X[support_vectors[0], 0],
        X[support_vectors[0], 1],
        s=200,
        facecolor="none",
        edgecolor="w",
        label=f"support vectors ($\\alpha$ > {threshold:.2f})",
    )

    # train and plot probability map
    # SVM cannot predict probabilities directly, but proabaility=True uses the Platt scaling essentially interpreting
    # th distance to the decision boundary as a probability
    clf = SVC(kernel="rbf", gamma=gamma, C=C, probability=True)
    clf.fit(X, y)
    ZZZ = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])
    ZZZ = ZZZ[:, 0].reshape(xx.shape)
    support_vectors = clf.support_vectors_
    ax[2].pcolor(xx, yy, ZZZ)
    ax[2].contour(xx, yy, ZZZ, colors="k", levels=[0.5])
    ax[2].scatter(
        support_vectors[:, 0],
        support_vectors[:, 1],
        s=200,
        facecolor="none",
        edgecolor="w",
        label="support vectors",
    )
    ax[2].set_title("SVM with RBF kernel")

    # plot training samples
    for a in ax:
        a.scatter(X[y == 0, 0], X[y == 0, 1], color="r", marker="^", s=50, facecolors="none", label="class 0")
        a.scatter(X[y == 1, 0], X[y == 1, 1], color="b", marker="o", s=50, facecolors="none", label="class 1")
        a.legend()

plt.show()