---
title: "SUPPORT VECTOR MACHINES"
author: "Kwabena Asabere"
df-print: kable
code-overflow: wrap
execute:
    echo: true
    warning: false
    message: false
format: html
---

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from sklearn.preprocessing import StandardScaler,OneHotEncoder,PolynomialFeatures,FunctionTransformer,MinMaxScaler,LabelBinarizer
from sklearn.pipeline import Pipeline,make_pipeline
from sklearn.model_selection import train_test_split,cross_val_score,cross_validate,cross_val_predict,GridSearchCV
from sklearn.metrics import accuracy_score,confusion_matrix,precision_recall_curve,roc_curve,roc_auc_score
ConfusionMatrixDisplay,root_mean_squared_error,r2_score
from sklearn.compose import ColumnTransformer,make_column_transformer
from sklearn.impute import SimpleImputer

NameError: name 'ConfusionMatrixDisplay' is not defined

In [None]:
heart = pd.read_csv(r"C:\Users\KAsab\Desktop\Analysis_Workshop\data\south_africa_heart.csv")

### Linear SVM Classification

In [None]:
iris = pd.read_csv(r"C:\Users\KAsab\Desktop\Analysis_Workshop\data\Iris.csv")
iris = iris.iloc[:100,:]

In [None]:
iris.head()

In [None]:
sns.scatterplot(
    data = iris,
    x = "PetalLengthCm",
    y = "PetalWidthCm",
    hue = "Species"
)
plt.xlabel("Petal Length")
plt.ylabel("Petal Width")
plt.grid()
plt.show()

In [None]:
from sklearn.svm import LinearSVC,SVC,SVR,LinearSVR

In [None]:
iris.info()

In [None]:
X = iris.iloc[:,3:5]
y = iris["Species"] 

In [None]:
X.head()

In [None]:
y

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y , random_state = 42)

In [None]:
svm_clf = make_pipeline(
    StandardScaler(),
    LinearSVC(C = 1, random_state = 42)
)

In [None]:
svm_clf.fit(X_train,y_train)

In [None]:
y_pred = svm_clf.predict(X_train)

In [None]:
accuracy_score(y_train,y_pred)

In [None]:
y_train_pred = cross_val_predict(svm_clf,X_train,y_train,cv = 3)

In [None]:
y_scores = cross_val_predict(svm_clf,X_train,y_train,cv = 3, method  = "decision_function")

In [None]:
y_train_binary = LabelBinarizer().fit_transform(y_train).ravel()

In [None]:
precisions,recalls,thresholds = precision_recall_curve(y_train_binary,y_scores)

In [None]:
fig = plt.figure(figsize = (8,6))
plt.plot(thresholds,precisions[:-1],"b--", label = "Precision", linewidth = 2)
plt.plot(thresholds,recalls[:-1],"g-",label = "Recall",linewidth = 2)
plt.grid()
plt.xlabel("Threshold")
plt.legend()
plt.show()

In [None]:
fpr,tpr,thresholds = roc_curve(y_train_binary,y_scores)

In [None]:
auc = roc_auc_score(y_train_binary,y_scores)

In [None]:
fig = plt.figure(figsize = (8,6))
plt.plot(fpr,tpr,linewidth = 2, label = f"ROC curve (AUC = {auc})")
plt.plot([0,1],[0,1], "k:",label = "Random Classifier's ROC curve")
plt.grid()
plt.legend()
plt.show()

### Nonlinear SVM Classification

In [None]:
from sklearn.datasets import make_moons
from sklearn.preprocessing import PolynomialFeatures

In [None]:
X,y = make_moons(n_samples = 100, noise = 0.15, random_state = 42)

In [None]:
poly_svm_clf = make_pipeline(
    PolynomialFeatures(degree = 3),
    StandardScaler(),
    LinearSVC(C = 10, max_iter = 10_000, random_state = 42)
)

In [None]:
poly_svm_clf.fit(X,y)

In [None]:
poly_kernel_svc = make_pipeline(StandardScaler(),
                               SVC(kernel = "poly",degree = 3, coef0 = 1, C = 5))

In [None]:
poly_kernel_svc.fit(X,y)

The hyperparameter `coef0` controls how much the model is influenced by high-degree terms versus
low-degree terms.

In [None]:
rbf_svm = make_pipeline(StandardScaler(),
                       SVC(kernel = "rbf", gamma = 5, C = 0.001))

In [None]:
rbf_svm.fit(X,y)

Increasing gamma makes the bell-shaped curve narrower (see the
lefthand plots in Figure 5-8). As a result, each instance’s range of influence is smaller: the decision boundary ends
up being more irregular, wiggling around individual instances. Conversely, a small gamma value makes the bellshaped
curve wider: instances have a larger range of influence, and the decision boundary ends up smoot

So `γ` 
3
2
acts like a regularization hyperparameter: if your model is overfitting, you should reduce γ; if it is underfitting, you
should increase γ (similar to the C hyperparameter)her

In [None]:
from sklearn.svm import LinearSVR

In [None]:
diamonds = pd.read_csv(r"C:\Users\KAsab\Desktop\GERON\diamonds.csv")

In [None]:
diamonds.head()

In [None]:
diamonds = diamonds.iloc[:,1:]

In [None]:
X = diamonds.select_dtypes(include = "number")
y = diamonds["price"]

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 42)

In [None]:
svm_reg = make_pipeline(StandardScaler(),LinearSVR(epsilon = 0.5,random_state = 42))

In [None]:
svm_reg.fit(X_train,y_train)

In [None]:
y_preds = svm_reg.predict(X_train)

In [None]:
rmse = root_mean_squared_error

In [None]:
rmse(y_train,y_preds)

In [None]:
r2_score(y_train,y_preds)

In [None]:
svm_poly_reg = make_pipeline(StandardScaler(),
                            SVR(kernel = "poly",degree = 2, C = 0.01, epsilon = 0.1))

In [None]:
svm_poly_reg.fit(X_train,y_train)

In [None]:
svm_poly_reg.predict(X_test)