# Introduction to Support Vector Machines (SVM)

In [None]:
import numpy as np
from sklearn import datasets
from sklearn import model_selection
from sklearn import metrics

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
X, y = datasets.make_classification(n_samples=100, n_features=2,
                                    n_redundant=0, n_classes=2,
                                    random_state=123)

In [None]:
X.shape, y.shape

In [None]:
plt.figure(figsize=(10,6))
plt.scatter(X[:,0],X[:,1],c=y,cmap=plt.cm.coolwarm,s=100)
plt.xlabel('X values')
plt.ylabel('Y values')

In [None]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.3, random_state=123
)

In [None]:
from sklearn.svm import SVC

params = {'kernel':'linear'}
classifier = SVC(**params, gamma='auto')

In [None]:
classifier.fit(X_train,y_train)

In [None]:
predictions=classifier.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test,predictions))

In [None]:
metrics.accuracy_score(y_test, predictions)

In [None]:
def plot_decision_boundary(classifier, X_test, y_test):
    # create a mesh to plot in
    h = 0.02  # step size in mesh
    x_min, x_max = X_test[:, 0].min() - 1, X_test[:, 0].max() + 1
    y_min, y_max = X_test[:, 1].min() - 1, X_test[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    
    X_hypo = np.c_[xx.ravel().astype(np.float32),
                   yy.ravel().astype(np.float32)]
    zz = classifier.predict(X_hypo)
    zz = zz.reshape(xx.shape)
    
    plt.contourf(xx, yy, zz, cmap=plt.cm.coolwarm, alpha=0.9)
    plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=plt.cm.coolwarm, s=200)

In [None]:
plt.figure(figsize=(10, 6))
plot_decision_boundary(classifier, X_test, y_test)

# Build a non-linear classifier using SVM

In [None]:
params = {'kernel':'rbf'}
classifier = SVC(**params, gamma='auto')
classifier.fit(X_train,y_train)

predictions=classifier.predict(X_test)
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test,predictions))
metrics.accuracy_score(y_test, predictions)

The accuracy increased to 95% when we use a non-linear classifier!

Since the data has only 2 features, it is easy to plot for visualization.

You can visualize the effect of a non-linear SVM classifier on our test dataset.

In [None]:
plt.figure(figsize=(10, 6))
plot_decision_boundary(classifier, X_test, y_test)

We can test out other non-linear SVM variants available by changing the SVM kernel and visualize the effect on our test dataset.

In [None]:
params = {'kernel':'poly','degree':3}
classifier = SVC(**params, gamma='auto')
classifier.fit(X_train,y_train)

predictions=classifier.predict(X_test)
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test,predictions))
metrics.accuracy_score(y_test, predictions)

In [None]:
plt.figure(figsize=(10, 6))
plot_decision_boundary(classifier, X_test, y_test)

In [None]:
params = {'kernel':'sigmoid'}
classifier = SVC(**params, gamma='auto')
classifier.fit(X_train,y_train)

predictions=classifier.predict(X_test)
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test,predictions))
metrics.accuracy_score(y_test, predictions)

In [None]:
plt.figure(figsize=(10, 6))
plot_decision_boundary(classifier, X_test, y_test)

# Classifying IRIS dataset by using Support Vector Machine

In [None]:
iris = datasets.load_iris()

In [None]:
dir(iris)

In [None]:
iris.data.shape

In [None]:
iris.target_names

In [None]:
iris.feature_names

In [None]:
np.unique(iris.target)

In [None]:
print(iris.target)

In [None]:
data = iris.data.astype(np.float32)
target = iris.target.astype(np.float32)

In [None]:
print(len(data))
print(len(target))

In [None]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    data, target, test_size=0.3, random_state=123
)

In [None]:
X_train.shape, y_train.shape

In [None]:
X_test.shape, y_test.shape

In [None]:
from sklearn.svm import SVC

params = {'kernel':'linear'}
classifier = SVC(**params, gamma='auto')

In [None]:
classifier.fit(X_train,y_train)

In [None]:
predictions=classifier.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test,predictions))

In [None]:
metrics.accuracy_score(y_test, predictions)

In [None]:
params = {'kernel':'poly','degree':3}
classifier = SVC(**params, gamma='auto')
classifier.fit(X_train,y_train)

predictions=classifier.predict(X_test)
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test,predictions))
metrics.accuracy_score(y_test, predictions)

In [None]:
params = {'kernel':'rbf'}
classifier = SVC(**params, gamma='auto')
classifier.fit(X_train,y_train)

predictions=classifier.predict(X_test)
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test,predictions))
metrics.accuracy_score(y_test, predictions)

In [None]:
params = {'kernel':'sigmoid'}
classifier = SVC(**params, gamma='auto')
classifier.fit(X_train,y_train)

predictions=classifier.predict(X_test)
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test,predictions))
metrics.accuracy_score(y_test, predictions)