# Kernel SVM

## Importing the libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [None]:
dataset = pd.read_csv('HR_Dataset.csv')
X = dataset.iloc[:, :-3].values
y = dataset.iloc[:, -3].values

## Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 0)

In [None]:
print(X_train)

[[ 0.4   0.47  5.   ...  3.    0.    0.  ]
 [ 0.72  0.96  5.   ...  5.    0.    1.  ]
 [ 0.41  0.38  4.   ... 10.    1.    0.  ]
 ...
 [ 0.69  0.94  5.   ...  2.    0.    0.  ]
 [ 0.78  0.59  5.   ...  3.    0.    0.  ]
 [ 0.56  0.68  5.   ...  2.    0.    0.  ]]


In [None]:
print(y_train)

[0 0 0 ... 0 0 0]


In [None]:
print(X_test)

[[0.44 0.57 2.   ... 3.   0.   1.  ]
 [0.55 0.96 3.   ... 3.   0.   0.  ]
 [0.72 0.67 5.   ... 2.   0.   0.  ]
 ...
 [0.96 0.53 3.   ... 4.   0.   0.  ]
 [0.59 0.48 3.   ... 3.   0.   0.  ]
 [0.59 0.98 3.   ... 3.   0.   0.  ]]


In [None]:
print(y_test)

[0 0 0 ... 0 0 0]


## Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
print(X_train)

[[-0.85879834 -1.44154247  0.96907349 ... -0.34033094 -0.41103163
  -0.55878327]
 [ 0.42720506  1.41946158  0.96907349 ...  1.02757954 -0.41103163
   1.7896026 ]
 [-0.81861074 -1.96703301  0.15761495 ...  4.44735572  2.43290277
  -0.55878327]
 ...
 [ 0.30664224  1.30268591  0.96907349 ... -1.02428617 -0.41103163
  -0.55878327]
 [ 0.6683307  -0.74088842  0.96907349 ... -0.34033094 -0.41103163
  -0.55878327]
 [-0.21579664 -0.21539788  0.96907349 ... -1.02428617 -0.41103163
  -0.55878327]]


In [None]:
print(X_test)

[[-0.69804792 -0.85766409 -1.46530211 ... -0.34033094 -0.41103163
   1.7896026 ]
 [-0.25598425  1.41946158 -0.65384358 ... -0.34033094 -0.41103163
  -0.55878327]
 [ 0.42720506 -0.27378571  0.96907349 ... -1.02428617 -0.41103163
  -0.55878327]
 ...
 [ 1.39170762 -1.09121544 -0.65384358 ...  0.3436243  -0.41103163
  -0.55878327]
 [-0.09523382 -1.38315463 -0.65384358 ... -0.34033094 -0.41103163
  -0.55878327]
 [-0.09523382  1.53623726 -0.65384358 ... -0.34033094 -0.41103163
  -0.55878327]]


## Training the Kernel SVM model on the Training set

In [None]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)

SVC(random_state=0)

## Predicting a new result

In [None]:
print(classifier.predict(sc.transform([[30,87000]])))

## Predicting the Test set results

In [None]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


## Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1475    0]
 [  25    0]]


0.9833333333333333

## Visualising the Training set results

In [None]:
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_train), y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
                    np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
 plt.xlim(X1.min(), X1.max())
 plt.ylim(X2.min(), X2.max())
 for i, j in enumerate(np.unique(y_set)):
     plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
 plt.title('Kernel SVM (Training set)')
 plt.xlabel('Age')
 plt.ylabel('Estimated Salary')
 plt.legend()
 plt.show()

## Visualising the Test set results

In [None]:
 from matplotlib.colors import ListedColormap
 X_set, y_set = sc.inverse_transform(X_test), y_test
 X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
                      np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
 plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
              alpha = 0.75, cmap = ListedColormap(('red', 'green')))
 plt.xlim(X1.min(), X1.max())
 plt.ylim(X2.min(), X2.max())
 for i, j in enumerate(np.unique(y_set)):
     plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
 plt.title('Kernel SVM (Test set)')
 plt.xlabel('Age')
 plt.ylabel('Estimated Salary')
 plt.legend()
 plt.show()