# SVM

## Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import pickle
%matplotlib inline

## Importing the dataset

In [2]:
dataset = pd.read_csv('extracted_feature_with_Response.csv')
X = dataset.iloc[:, [1, 258]].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

## Feature Scaling

In [4]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Fitting Kernel SVM to the Training set

### Kernel = RBF & C = 1

In [5]:
from sklearn.svm import SVC
classifier_1 = SVC(C=1, kernel = 'rbf', gamma = 'auto',random_state = 0)
classifier_1.fit(X_train, y_train)

SVC(C=1, gamma='auto', random_state=0)

### Kernel = RBF & C = 10

In [6]:
from sklearn.svm import SVC
classifier_2 = SVC(C=10, kernel = 'rbf', gamma = 'auto',random_state = 0)
classifier_2.fit(X_train, y_train)

SVC(C=10, gamma='auto', random_state=0)

### Kernel = Linear & C = 1

In [7]:
from sklearn.svm import SVC
classifier_3 = SVC(C=1, kernel = 'linear', gamma = 'auto',random_state = 0)
classifier_3.fit(X_train, y_train)

SVC(C=1, gamma='auto', kernel='linear', random_state=0)

### Kernel = Linear & C = 10

In [8]:
from sklearn.svm import SVC
classifier_4 = SVC(C=10,kernel = 'linear', gamma = 'auto',random_state = 0)
classifier_4.fit(X_train, y_train)

SVC(C=10, gamma='auto', kernel='linear', random_state=0)

### Kernel = Polynomial & C = 1

In [9]:
from sklearn.svm import SVC
classifier_5 = SVC(C=1, kernel = 'poly', gamma = 'auto',random_state = 0)
classifier_5.fit(X_train, y_train)

SVC(C=1, gamma='auto', kernel='poly', random_state=0)

### Kernel = Polynomial & C = 10

In [10]:
from sklearn.svm import SVC
classifier_6 = SVC(C=10, kernel = 'poly', gamma = 'auto',random_state = 0)
classifier_6.fit(X_train, y_train)

SVC(C=10, gamma='auto', kernel='poly', random_state=0)

### Kernel = Sigmoid & C = 1

In [11]:
from sklearn.svm import SVC
classifier_7 = SVC(C=1, kernel = 'sigmoid', gamma = 'auto',random_state = 0)
classifier_7.fit(X_train, y_train)

SVC(C=1, gamma='auto', kernel='sigmoid', random_state=0)

### Kernel = Sigmoid & C = 10

In [12]:
from sklearn.svm import SVC
classifier_8 = SVC(C=10, kernel = 'sigmoid', gamma = 'auto',random_state = 0)
classifier_8.fit(X_train, y_train)

SVC(C=10, gamma='auto', kernel='sigmoid', random_state=0)

## Fitting XGBoost to the Training set

In [None]:
from xgboost import XGBClassifier
classifier = XGBClassifier()

for i in range(1,8):
    classifier_$i = classifier_$i.fit(X_train, y_train)

## Predicting the Test set results

In [13]:
y_pred_1 = classifier_1.predict(X_test)
y_pred_2 = classifier_2.predict(X_test)
y_pred_3 = classifier_3.predict(X_test)
y_pred_4 = classifier_4.predict(X_test)
y_pred_5 = classifier_5.predict(X_test)
y_pred_6 = classifier_6.predict(X_test)
y_pred_7 = classifier_7.predict(X_test)
y_pred_8 = classifier_8.predict(X_test)

## Making the Confusion Matrix

In [14]:
from sklearn.metrics import confusion_matrix
cm_1 = confusion_matrix(y_test, y_pred_1)
cm_2 = confusion_matrix(y_test, y_pred_2)
cm_3 = confusion_matrix(y_test, y_pred_3)
cm_4 = confusion_matrix(y_test, y_pred_4)
cm_5 = confusion_matrix(y_test, y_pred_5)
cm_6 = confusion_matrix(y_test, y_pred_6)
cm_7 = confusion_matrix(y_test, y_pred_7)
cm_8 = confusion_matrix(y_test, y_pred_8)

In [15]:
cm_1
cm_2
cm_3
cm_4
cm_5
cm_6
cm_7
cm_8

array([[ 2,  5],
       [ 7, 20]])

## Applying k-Fold Cross Validation

In [16]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier_1, X = X_train, y = y_train, cv = 10)

accuracies.mean()
accuracies.std()

0.02417582417582418

## Applying k-Fold Cross Validation

In [17]:
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
              {'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]
grid_search_1 = GridSearchCV(estimator = classifier_1,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)


grid_search_2 = GridSearchCV(estimator = classifier_2,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)

grid_search_3 = GridSearchCV(estimator = classifier_3,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)

grid_search_4 = GridSearchCV(estimator = classifier_4,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)

grid_search_5 = GridSearchCV(estimator = classifier_5,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)

grid_search_6 = GridSearchCV(estimator = classifier_6,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)

grid_search_7 = GridSearchCV(estimator = classifier_7,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)


grid_search_8 = GridSearchCV(estimator = classifier_8,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)


grid_search = grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_

NameError: name 'grid_search' is not defined

## Confusion Matrix and Accuracy Score

In [None]:
# View the accuracy score
print('Best score for training data:', best_accuracy,"\n") 

# View the best parameters for the model found using grid search
print('Best C:',best_parameters.C,"\n") 
print('Best Kernel:',best_parameters.kernel,"\n")
print('Best Gamma:',best_parameters.gamma,"\n")

final_model = best_parameters
Y_pred = final_model.predict(X_test)
Y_pred_label = list(encoder.inverse_transform(Y_pred))

## Check Accuracy

In [None]:
from sklearn.metrics import accuracy_score
score = accuracy_score(y_test, y_pred)
score

## Visualising the Training set results

from matplotlib.colors import ListedColormap
X_set, y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Kernel SVM (Training set)')
plt.xlabel('Scaffolds')
plt.ylabel('Features')
plt.legend()
plt.show()

## Visualising the Test set results

from matplotlib.colors import ListedColormap
X_set, y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Kernel SVM (Test set)')
plt.xlabel('Scaffolds')
plt.ylabel('Features')
plt.legend()
plt.show()

## Create a Pickle file using serialization

In [18]:
# pickle.dump(X_test, open('model_svm.pkl','wb'))

pickle.dump(classifier_1, open('model_svm_1.pkl','wb'))
pickle.dump(classifier_2, open('model_svm_2.pkl','wb'))
pickle.dump(classifier_3, open('model_svm_3.pkl','wb'))
pickle.dump(classifier_4, open('model_svm_4.pkl','wb'))
pickle.dump(classifier_5, open('model_svm_5.pkl','wb'))
pickle.dump(classifier_6, open('model_svm_6.pkl','wb'))
pickle.dump(classifier_7, open('model_svm_7.pkl','wb'))
pickle.dump(classifier_8, open('model_svm_8.pkl','wb'))

## Loading model to compare the results

model = pickle.load(open('model_svm.pkl','rb'))