In [None]:
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets, inspection, metrics, model_selection
from sklearn import pipeline, preprocessing, svm

# Support Vector Machines

[Support vector machines (SVMs)](https://scikit-learn.org/stable/modules/svm.html) are a set of supervised learning methods used for [classification](https://scikit-learn.org/stable/modules/svm.html#svm-classification), [regression](https://scikit-learn.org/stable/modules/svm.html#svm-regression) and [outlier detection](https://scikit-learn.org/stable/modules/svm.html#svm-outlier-detection).

The advantages of support vector machines are:

* Effective in high dimensional spaces.
* Still effective in cases where number of dimensions is greater than the number of samples.
* Uses a subset of training points in the decision function (called support vectors), so it is also memory efficient.
* Versatile: different [Kernel functions](https://scikit-learn.org/stable/modules/svm.html#svm-kernels) can be specified for the decision function. Common kernels are provided, but it is also possible to specify custom kernels.

The disadvantages of support vector machines include:

* If the number of features is much greater than the number of samples, avoid over-fitting in choosing Kernel functions and regularization term is crucial.
* SVMs do not directly provide probability estimates, these are calculated using an expensive five-fold cross-validation (see Scores and probabilities, below).

## Training and Visualizing a Support Vector Machine

In [None]:
iris_data = datasets.load_iris(
    as_frame=True
)

In [None]:
features_df = iris_data.data
target = iris_data.target

In [None]:
features_df.head()

In [None]:
target

In [None]:
svm.LinearSVC?

In [None]:
svm.SVC?

In [None]:
linear_svc_classification = pipeline.make_pipeline(
    preprocessing.StandardScaler(),
    svm.LinearSVC(),
)

svc_linear_kernel_classification = pipeline.make_pipeline(
    preprocessing.StandardScaler(),
    svm.SVC(kernel="linear"),
)


In [None]:
linear_svc_classification

In [None]:
svc_linear_kernel_classification

In [None]:
%%timeit
_ = linear_svc_classification.fit(features_df, target)

In [None]:
%%timeit
_ = svc_linear_kernel_classification.fit(features_df, target)

In [None]:
# Take the first two features. We could avoid this by using a two-dim dataset
X = features_df.loc[:, ["sepal length (cm)", "sepal width (cm)"]]
y = target

# we create an instance of SVM and fit out data. We do not scale our
# data since we want to plot the support vectors
models = [
    linear_svc_classification,
    svc_linear_kernel_classification,
]
models = (model.fit(X, y) for model in models)

# title for the plots
titles = (
    "SVC with linear kernel",
    "LinearSVC (linear kernel)",
)

# Set-up 1x2 grid for plotting.
fig, axes = plt.subplots(1, 2)
plt.subplots_adjust(wspace=0.4, hspace=0.4)

for model, title, ax in zip(models, titles, axes.flatten()):
    display = (
        inspection.DecisionBoundaryDisplay
                  .from_estimator(
                      model,
                      X,
                      response_method="predict",
                      cmap=plt.cm.coolwarm,
                      alpha=0.8,
                      ax=ax,
                      xlabel=X.columns[0],
                      ylabel=X.columns[1],
                  )
    )
    ax.scatter(X.iloc[:, 0], X.iloc[:, 1], c=y, cmap=plt.cm.coolwarm, s=20, edgecolors="k")
    ax.set_xticks(())
    ax.set_yticks(())
    ax.set_title(title)

plt.show()

## Making Predictions

In [None]:
_ = svc_linear_kernel_classification.fit(features_df, target)

In [None]:
svc_linear_kernel_classification[1].support_vectors_

In [None]:
svc_linear_kernel_classification.predict(features_df)

## Predicting Class Probabilities

In [None]:
svc_linear_kernel_classification = pipeline.make_pipeline(
    preprocessing.StandardScaler(),
    svm.SVC(kernel="linear", probability=True),
)

_ = svc_linear_kernel_classification.fit(features_df, target)

In [None]:
svc_linear_kernel_classification.predict_proba(features_df)

## Regularization

In [None]:
svm.SVC?

In [None]:
metrics.get_scorer_names()

In [None]:
_ = svc_linear_kernel_classification.fit(features_df, target)

_predictions = svc_linear_kernel_classification.predict(features_df)
metrics.f1_score(
    target,
    _predictions,
    average="macro",
)

In [None]:
cv_scores = model_selection.cross_val_score(
    svc_linear_kernel_classification,
    features_df,
    target,
    cv=5,
    n_jobs=-1,
    scoring="f1_macro"
)

In [None]:
cv_scores

In [None]:
np.mean(cv_scores)

## Exercise

Fit a support vector machine to the following dataset. Select an appropriate scoring metric and evaluate the performance of your classifier using cross-validation. Is your classifier under-fitting? Over-fitting? Tune the regularization hyperparameters using grid search to improve the performance of your classifier.

In [None]:
datasets.load_breast_cancer?

In [None]:
breast_cancer_data = datasets.load_breast_cancer(
    as_frame=True
)

### Solution

In [None]:
features_df = breast_cancer_data.data
target = breast_cancer_data.target

# define and fit your pipeline
svc_classification_pipeline = pipeline.make_pipeline(
    preprocessing.StandardScaler(),
    svm.SVC(kernel="linear"),
)
_ = svc_classification_pipeline.fit(features_df, target)

# assess training performance
_predictions = svc_classification_pipeline.predict(features_df)
training_f1_score = metrics.f1_score(
    target,
    _predictions,
    average="macro",
)
print(f"Training f1 score {training_f1_score}")

# assess validation performance using cv
validation_f1_scores = model_selection.cross_val_score(
    svc_classification_pipeline,
    features_df,
    target,
    cv=5,
    n_jobs=-1,
    scoring="f1_macro"
)
print(f"Validation f1 score {np.mean(validation_f1_scores)}")


In [None]:
# fine-tune regularization hyperparameters
param_grid = [
    {
      "svc__C": np.logspace(-3, 3, 7),
      "svc__kernel": ["linear"]
    },
]

grid_search_cv = model_selection.GridSearchCV(
    svc_classification_pipeline,
    param_grid,
    cv=5,
    n_jobs=-1,
    scoring="f1_macro"
)

In [None]:
grid_search_cv

In [None]:
_ = grid_search_cv.fit(features_df, target)

In [None]:
grid_search_cv.best_score_

In [None]:
grid_search_cv.best_params_

## Exercise

Modify the code from your previous exercise to treat the kernel as a hyperparameter. For each kernel, tune the regularization hyperparameters to find the best model.

### Solution

In [None]:
# fine-tune regularization hyperparameters
param_grid = [
    {
      "svc__C": np.logspace(-3, 3, 7),
      "svc__kernel": ["linear"]
    },
    {
      "svc__C": np.logspace(-3, 3, 7),
      "svc__kernel": ["poly"],
    },
    {
      "svc__C": np.logspace(-3, 3, 7),
      "svc__kernel": ["rbf"],
    },
]

grid_search_cv = model_selection.GridSearchCV(
    svc_classification_pipeline,
    param_grid,
    cv=5,
    n_jobs=-1,
    scoring="f1_macro"
)

In [None]:
_ = grid_search_cv.fit(features_df, target)

In [None]:
grid_search_cv.best_score_

In [None]:
grid_search_cv.best_params_

## Exercise

Train an SVM classifier on the wine dataset. This dataset contains the chemical analyses of 178 wine samples produced by 3 different cultivators: the goal is to train a classification model capable of predicting the cultivator based on the wine’s chemical analysis. What accuracy can you reach?


In [None]:
datasets.load_wine?