# SVM Tutorial

<table class="nt-notebook-buttons" align="center">
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/JeremieGince/Learning_SVM/blob/main/notebook.ipynb"><img src="https://github.com/NeuroTorch/NeuroTorch/blob/main/images/colab_logo_32px.png?raw=true" width=32px height=32px  />Run in Google Colab</a>
</td>
  <td>
    <a target="_blank" href="https://github.com/JeremieGince/Learning_SVM/blob/main/notebook.ipynb"><img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" width=32px height=32px />View source on GitHub</a>
  </td>
  <td>
    <a href="https://storage.googleapis.com/JeremieGince/Learning_SVM/blob/main/notebook.ipynb"><img src="https://github.com/NeuroTorch/NeuroTorch/blob/main/images/download_logo_32px.png?raw=true" width=32px height=32px />Download notebook</a>
  </td>
</table>

## Setup

You can now install the dependencies by running the following commands:

In [None]:
%%capture
#@title Install dependencies {display-mode: "form"}

RunningInCOLAB = 'google.colab' in str(get_ipython()) if hasattr(__builtins__,'__IPYTHON__') else False

if RunningInCOLAB:
    !git clone https://github.com/JeremieGince/Learning_SVM.git
    %cd Learning_SVM/

!pip install -r requirements.txt

After setting up the virtual environment, we will need to import the necessary packages.

In [None]:
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import psutil
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

if RunningInCOLAB:
    from Learning_SVM.kernels import ClassicalKernel, QuantumKernel
    from Learning_SVM.scratch import SVC
    from Learning_SVM.visualization import Visualizer
else:
    from kernels import ClassicalKernel, QuantumKernel
    from scratch import SVMFromScratch
    from visualization import Visualizer

## Dataset

In the next cell, we will load the dataset. By uncommenting the appropriate line, you can choose between the breast cancer, iris, or synthetic dataset.

In [None]:
# dataset = datasets.load_breast_cancer(as_frame=True)
# dataset = datasets.load_iris(as_frame=True)
dataset = datasets.make_classification(
    n_samples=100,
    n_features=4,
    n_classes=2,
    n_clusters_per_class=1,
    n_informative=2,
    n_redundant=0,
    random_state=0,
)

In [None]:
if isinstance(dataset, tuple):
    X, y = dataset
elif isinstance(dataset, dict):
    X = dataset["data"]
    y = dataset["target"]
elif isinstance(dataset, pd.DataFrame):
    X = dataset.data
    y = dataset.target
else:
    raise ValueError(f"Unknown dataset type: {type(dataset)}")

## Preprocessing

In [None]:
# X = StandardScaler().fit_transform(X)
X = MinMaxScaler(feature_range=(0, 1)).fit_transform(X)
# y = MinMaxScaler(feature_range=(-1, 1)).fit_transform(y.reshape(-1, 1)).reshape(-1).astype(int)
print(f"{X.shape = }, {y.shape = }")
print(f"{np.unique(y) = }")
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

## Model

In [None]:
embedding_size = X.shape[-1]

### Kernel

Here we will use a classical and a quantum kernel to train the SVM models.

In [None]:
clas_kernel = ClassicalKernel(
    embedding_dim=embedding_size,
    metric="rbf",
    seed=0
).fit(X, y)

q_kernel = QuantumKernel(
    embedding_dim=embedding_size,
    seed=0,
    # encoder_matrix=rn_embed_matrix,
    shots=128,
    nb_workers=max(0, psutil.cpu_count(logical=False) - 2),
    interface="auto",
).fit(X, y)

### SVM

We will use the classical and quantum kernels to train the SVM models.

In [None]:
clas_model = svm.SVC(kernel=clas_kernel.kernel, random_state=0)
qml_model = svm.SVC(kernel=q_kernel.kernel, random_state=0)
scratch_model = SVC(kernel=clas_kernel.kernel, max_iter=1_000)
q_scratch_model = SVC(kernel=q_kernel.kernel, max_iter=1_000)

models = {
    "classical": clas_model,
    "scratch": scratch_model,
    "qml": qml_model,
    "q_scratch": q_scratch_model,
}

## Training & Evaluation

We will now train the models and evaluate their performance. We will also visualize the decision boundaries in the reduced space.

In [None]:
n_plots = len(models)
n_rows = int(np.ceil(np.sqrt(n_plots)))
n_cols = int(np.ceil(n_plots / n_rows))
fig, axes = plt.subplots(n_rows, n_cols, tight_layout=True, figsize=(14, 10), sharex="all", sharey="all")
axes = np.ravel(np.asarray([axes]))
for i, (m_name, model) in enumerate(models.items()):
    fit_start_time = time.time()
    model.fit(x_train, y_train)
    fit_end_time = time.time()
    fit_time = fit_end_time - fit_start_time
    accuracy = model.score(x_test, y_test)
    print(f"{m_name} test accuracy: {accuracy * 100 :.4f}%, {fit_time = :.5f} [s]")

    fig, ax = Visualizer.plot_2d_decision_boundaries(
        model=model,
        X=X, y=y,
        # reducer=decomposition.PCA(n_components=2, random_state=0),
        # reducer=umap.UMAP(n_components=2, transform_seed=0, n_jobs=max(0, psutil.cpu_count() - 2)),
        check_estimators=False,
        n_pts=(1_000 if m_name.startswith('q') else 100_000),
        title=f"Decision boundaries in the reduced space.",
        legend_labels=getattr(dataset, "target_names", None),
        # axis_name="RN",
        fig=fig, ax=axes[i],
        interpolation="nearest",
    )
    ax.set_title(f"{m_name} accuracy: {accuracy * 100:.2f}%")

plt.show()