# Support Vector Classifier
## Hyper-parameter Tunning

In [1]:
import warnings
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV, RepeatedStratifiedKFold
warnings.filterwarnings('ignore')

df_train = pd.read_csv('Final_Train_dataset.csv')
X_train  = df_train.iloc[:,:-1]
y_train  = df_train.VirusDetected

In [8]:
# Specify different values for the tunning process
StratifiedKFold = RepeatedStratifiedKFold(n_splits     = 5, 
                                          n_repeats    = 3, 
                                          random_state = 99)

kernel  = ['rbf', 'poly', 'sigmoid','linear']
degree  = [1, 2, 3, 4]
gamma   = ['scale', 'auto']
C       = [0.01, 0.1, 1, 5]

#Create parameter grid
svc_grid = [{'kernel' : kernel,
             'degree' : degree, 
             'gamma'  : gamma,
             'C'      : C}]

#Create SVR object
svc_model  = SVC()

#Grid Search CV
svc_search   = GridSearchCV(svc_model, 
                           svc_grid, 
                           scoring= 'accuracy', 
                           cv = StratifiedKFold, 
                           verbose= True)

In [3]:
from datetime import datetime

def timer(start_time=None):
    if not start_time:
        start_time = datetime.now()
        return start_time
    elif start_time:
        thour, temp_sec = divmod((datetime.now() - start_time).total_seconds(), 3600)
        tmin, tsec = divmod(temp_sec, 60)
        print('\n Time taken: %i hours %i minutes and %s seconds.' % (thour, tmin, round(tsec, 2)))
        
start_time = timer(None) # timing starts from this point for "start_time" variable
svc_search.fit(X_train,y_train)
timer(start_time) # timing ends here for "start_time" variable

Fitting 15 folds for each of 128 candidates, totalling 1920 fits

 Time taken: 0 hours 0 minutes and 5.49 seconds.


In [4]:
svc_search.best_params_

{'C': 5, 'degree': 1, 'gamma': 'scale', 'kernel': 'linear'}

- Specify the optimal model

In [7]:
optimal_model = SVC(C      = 5,
                    degree = 100,
                    gamma  = 'scale',
                    kernel = 'linear').fit(X_train, y_train)