In [1]:
import joblib
import pandas as pd
import warnings
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.svm import SVC

warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)

# Load Data

In [2]:
tr_features = pd.read_csv('./DATASET/train_features.csv')
tr_labels = pd.read_csv('./DATASET/train_labels.csv')

# Find best hyperparameters

In [3]:
def print_results(results):
    print('Best Parameters: {}\n'.format(results.best_params_))
    means = results.cv_results_['mean_test_score']
    stds = results.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, results.cv_results_['params']):
        print('{} (+/-{}) for {}'.format(round(mean, 3), round(std * 2, 3), params))

In [5]:
svc = SVC()

parameters = {
    'kernel': ['linear', 'rbf', 'poly'],
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto'],
    'class_weight': ['balanced']
}

cv = StratifiedKFold(n_splits=5)
grid_search  = GridSearchCV(svc, parameters, cv=cv, return_train_score=True, n_jobs=1, verbose=10)

grid_search.fit(tr_features, tr_labels.values.ravel())

print_results(grid_search)

Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV 1/5; 1/18] START C=0.1, class_weight=balanced, gamma=scale, kernel=linear...
[CV 1/5; 1/18] END C=0.1, class_weight=balanced, gamma=scale, kernel=linear;, score=(train=0.959, test=0.959) total time=  24.6s
[CV 2/5; 1/18] START C=0.1, class_weight=balanced, gamma=scale, kernel=linear...
[CV 2/5; 1/18] END C=0.1, class_weight=balanced, gamma=scale, kernel=linear;, score=(train=0.961, test=0.961) total time=  26.0s
[CV 3/5; 1/18] START C=0.1, class_weight=balanced, gamma=scale, kernel=linear...
[CV 3/5; 1/18] END C=0.1, class_weight=balanced, gamma=scale, kernel=linear;, score=(train=0.959, test=0.960) total time=  24.8s
[CV 4/5; 1/18] START C=0.1, class_weight=balanced, gamma=scale, kernel=linear...
[CV 4/5; 1/18] END C=0.1, class_weight=balanced, gamma=scale, kernel=linear;, score=(train=0.959, test=0.959) total time=  24.2s
[CV 5/5; 1/18] START C=0.1, class_weight=balanced, gamma=scale, kernel=linear...
[CV 5/5; 1/18] END

In [6]:
grid_search.best_estimator_

# Save the model

In [7]:

joblib.dump(grid_search.best_estimator_, 'SVM_model.pkl')

['SVM_model.pkl']