# Kernel SVM

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [3]:
dataset = pd.read_csv('lung_cancer.csv')
X = dataset.iloc[:, 2:-1].values
y = dataset.iloc[:, -1].values

In [4]:
print(X)

[[35  3  5  4]
 [27 20  2  5]
 [30  0  5  2]
 [28  0  8  1]
 [68  4  5  6]
 [34  0 10  0]
 [58 15 10  0]
 [22 12  5  2]
 [45  2  6  0]
 [52 18  4  5]
 [33  4  8  0]
 [18 10  6  3]
 [25  2  5  1]
 [28 20  2  8]
 [34 25  4  8]
 [39 18  8  1]
 [42 22  3  5]
 [19 12  8  0]
 [62  5  4  3]
 [73 10  7  6]
 [55 15  1  3]
 [33  8  8  1]
 [22 20  6  2]
 [44  5  8  1]
 [77  3  2  6]
 [21 20  5  3]
 [37 15  6  2]
 [34 12  8  0]
 [55 20  1  4]
 [40 20  2  7]
 [36 13  5  2]
 [56 20  3  3]
 [47 15  1  8]
 [62 25  3  4]
 [26 10  7  2]
 [25 20  8  2]
 [59 20  3  4]
 [62 15  5  5]
 [33 25  8  2]
 [37 10  5  3]
 [50 20  2  4]
 [47 12  8  0]
 [69 20  5  4]
 [63 20  4  5]
 [39 15  7  2]
 [21 20  8  3]
 [31 20  9  4]
 [28 10  4  1]
 [53 20  6  3]
 [62 20  5  6]
 [42 12  6  2]
 [44 30  1  6]
 [26 34  1  8]
 [35 20  5  1]
 [26 13  6  1]
 [77 20  5  4]
 [75 15  3  5]
 [43 30  3  8]
 [51 25  9  0]]


In [5]:
print(y)

[1 1 0 0 1 0 0 0 0 1 0 0 0 1 1 0 1 0 1 1 1 0 0 0 1 0 0 0 1 1 0 1 1 1 0 0 1
 1 0 0 1 0 1 1 0 0 0 0 1 1 0 1 1 0 0 1 1 1 0]


## Splitting the dataset into the Training set and Test set

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [7]:
print(X_train)

[[69 20  5  4]
 [53 20  6  3]
 [22 12  5  2]
 [34 25  4  8]
 [47 15  1  8]
 [62 20  5  6]
 [40 20  2  7]
 [62 15  5  5]
 [75 15  3  5]
 [62  5  4  3]
 [77 20  5  4]
 [34 12  8  0]
 [39 18  8  1]
 [34  0 10  0]
 [56 20  3  3]
 [42 22  3  5]
 [42 12  6  2]
 [55 15  1  3]
 [44 30  1  6]
 [45  2  6  0]
 [28 20  2  8]
 [21 20  5  3]
 [19 12  8  0]
 [51 25  9  0]
 [43 30  3  8]
 [26 34  1  8]
 [33 25  8  2]
 [27 20  2  5]
 [25  2  5  1]
 [21 20  8  3]
 [77  3  2  6]
 [58 15 10  0]
 [44  5  8  1]
 [59 20  3  4]
 [33  8  8  1]
 [73 10  7  6]
 [52 18  4  5]
 [37 10  5  3]
 [26 13  6  1]
 [28  0  8  1]
 [35  3  5  4]
 [35 20  5  1]
 [28 10  4  1]
 [39 15  7  2]]


In [8]:
print(X_test)

[[37 15  6  2]
 [25 20  8  2]
 [63 20  4  5]
 [55 20  1  4]
 [18 10  6  3]
 [30  0  5  2]
 [26 10  7  2]
 [31 20  9  4]
 [50 20  2  4]
 [22 20  6  2]
 [68  4  5  6]
 [33  4  8  0]
 [36 13  5  2]
 [47 12  8  0]
 [62 25  3  4]]


In [9]:
print(y_train)

[1 1 0 1 1 1 1 1 1 1 1 0 0 0 1 1 0 1 1 0 1 0 0 0 1 1 0 1 0 0 1 0 0 1 0 1 1
 0 0 0 1 0 0 0]


In [10]:
print(y_test)

[0 0 1 1 0 0 0 0 1 0 1 0 0 0 1]


## Feature Scaling

In [11]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Training the Kernel SVM model on the Training set

In [14]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', C= 0.25, gamma= 0.1, random_state = 0)
classifier.fit(X_train, y_train)

SVC(C=0.25, gamma=0.1, kernel='linear', random_state=0)

# Applying Grid Search to find the best model and the best parameters

In [13]:
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [0.25, 0.5, 0.75, 1], 'kernel': ['linear']},
              {'C': [0.25, 0.5, 0.75, 1], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

Best Accuracy: 97.50 %
Best Parameters: {'C': 0.25, 'kernel': 'linear'}


## Predicting the Test set results 

In [15]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]]


## Making the Confusion Matrix

In [16]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
# accuracy_score(y_test, y_pred)

[[10  0]
 [ 0  5]]


## Applying k-Fold Cross Validation# 

In [17]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 97.50 %
Standard Deviation: 7.50 %


## Predicting a new result


In [19]:
print(classifier.predict(sc.transform([[44,30,1,6]])))

[1]
