# Support vector Machines

Support Vector Machines (SVM) is a `supervised machine learning algorithm` which can be used for both classification or regression challenges. 

However,  it is mostly used in classification problems. 

In this algorithm, we plot each data item as a point in n-dimensional space (where n is number of features you have) with the value of each feature being the value of a particular coordinate. Then, we perform classification by finding the hyper-plane that differentiate the two classes very well (look at the below snapshot).

Types of SVM kernels:

1. Linear Kernel
2. Polynomial Kernel
3. Radial Basis Function Kernel (RBF)

https://scikit-learn.org/stable/modules/svm.html#classification

# SVC

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# ml libraries
from sklearn.svm import SVC # support vector classifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

In [None]:
# import the data
df = sns.load_dataset('iris')
df.head()

In [None]:
df['species'].value_counts()

In [None]:
# lets make X and y in our data
X = df.drop('species', axis=1)
y = df['species']

In [None]:
# train test split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [None]:
# call the model
model = SVC(kernel='rbf') # radial basis function - one of the best kernel
# train the model
model.fit(X_train, y_train)
# predict the model
y_pred = model.predict(X_test)

In [None]:
# evaluate the model
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

In [None]:
# draw the confusion matrix using heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True)
# label the plot
plt.xlabel('Predicted')
plt.ylabel('Truth')
plt.show()

----
# SVR
Support Vector Machine can also be used as a regression method, maintaining all the main features that characterize the algorithm (maximal margin).

In [None]:
# SVR for diamonds dataset
df = sns.load_dataset('diamonds')
df.head()

In [None]:
df.info()

In [None]:
%%time
import numpy as np
import seaborn as sns
from sklearn.svm import SVR # support vector regressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error
# load the data
df = sns.load_dataset('diamonds')
df = df.sample(frac=0.2, random_state=42) # getting only 20% of data -- for fast results

# Select features and target variable
X = df.drop('price', axis=1)
y = df['price']

# Convert categorical variables to numerical variables
label_encoders = {}
for col in X.select_dtypes(include='category').columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Standardize the features
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.fit_transform(X_test)

# Initialize and train the SVR model
svr_model = SVR()
svr_model.fit(X_train, y_train)

# Predict the prices on the test set
y_pred = svr_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
r2 = r2_score(y_test, y_pred)
print(f'R2 Score: {r2}')
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')
mape = mean_absolute_percentage_error(y_test, y_pred)
print(f'Mean Absolute Percentage Error: {mape}')
# RMSE
rmse = np.sqrt(mse)
print(f'Root Mean Squared Error: {rmse}')

---

---
# Another example for SVC

In [None]:
from sklearn import datasets
cancer = datasets.load_breast_cancer()

In [None]:
# print feature names
print(cancer.feature_names)

# print target names
print(cancer.target_names) # malignant, benign

In [None]:
# print data shape
print(cancer.data.shape) # 569 samples, 30 features

In [None]:
# print first 5 rows of data
print(cancer.data[:5])

In [None]:
# print first 5 labels
print(cancer.target[:5]) # 0 for malignant, 1 for benign

In [None]:
from sklearn.model_selection import train_test_split

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2, random_state=42)

In [None]:
# import svm
from sklearn import svm

# create a svm classifier
model = svm.SVC(kernel='linear') # linear kernel
# train the model
model.fit(X_train, y_train)
# predict the model
y_pred = model.predict(X_test)

In [None]:
# import classification matrices
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

# print confusion matrix
print('Confusion Matrix:---------------------------')
print(confusion_matrix(y_test, y_pred))

# print classification report
print('Classification Report:---------------------------')
print(classification_report(y_test, y_pred))

# print accuracy score
print('Accuracy Score:---------------------------')
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
# visualize confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()