In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV

from sklearn import metrics 
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

In [3]:
path=r'D:\ML-project\data/monks-2'
monk1_train = pd.read_csv(path+'.train', header=None, delim_whitespace=True, dtype=str)
monk1_test = pd.read_csv(path+'.test', header=None, delim_whitespace=True, dtype=str)

y_train=monk1_train[0]
x_train=monk1_train[monk1_train.columns[1:-1]]

y_test=monk1_test[0]
x_test=monk1_test[monk1_train.columns[1:-1]]

x_train=pd.get_dummies(x_train)
x_test=pd.get_dummies(x_test)

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.fit_transform(x_test)

## SVM polinomial 

In [102]:
estimator_svm = SVC()

C=np.logspace(-4,3,5)

gamma=np.logspace(-4,3,5)

coef=np.linspace(-10,10,5)

deg=np.arange(1,3,1)

parameters_SVM_poly= {
    'C':C,
    'kernel': ['poly'],
    'gamma':gamma,
    'coef0':coef ,
    'degree':deg   
}      


# with GridSearch
grid_search_SVM_poly = GridSearchCV(
    estimator=estimator_svm,
    param_grid=parameters_SVM_poly,
    scoring = 'accuracy',
    cv=RepeatedStratifiedKFold(n_splits=5, n_repeats=10, random_state=0),
    n_jobs=-1,
    return_train_score = True
)
C

array([1.00000000e-04, 5.62341325e-03, 3.16227766e-01, 1.77827941e+01,
       1.00000000e+03])

In [103]:
SVM=grid_search_SVM_poly.fit(x_train, y_train)
print(grid_search_SVM_poly.best_params_) 
print('Best Score - Validation:', grid_search_SVM_poly.best_score_ )

y_pred_SVM =SVM.predict(x_test)

print('Accuracy Score - SVM - Test-error:', metrics.accuracy_score(y_test, y_pred_SVM))
cv_results_df = pd.DataFrame(grid_search_SVM_poly.cv_results_)
cv_results_df[['param_C','param_gamma','mean_test_score' ]]


## SVM RBF

In [70]:
estimator_svm = SVC()
C=np.logspace(-4,3,20)

gamma=(0.001,0.01,0.1,1,5,10)


#rbf kernel parameters
parameters_SVM_rbf= {
    'C':C,
    'kernel': ['rbf'],
    'gamma':gamma
}      

grid_search_SVM_rbf = GridSearchCV(
    estimator=estimator_svm,
    param_grid=parameters_SVM_rbf,
    scoring = 'accuracy',
    cv=RepeatedStratifiedKFold(n_splits=5, n_repeats=10, random_state=0),
    n_jobs=-1,
    return_train_score = True
)


In [71]:
SVM=grid_search_SVM_rbf.fit(x_train, y_train)
print(grid_search_SVM_rbf.best_params_) 
print('Best Score - Validation:', grid_search_SVM_rbf.best_score_ )

y_pred_SVM =SVM.predict(x_test)

print('Accuracy Score - SVM - Test-error:', metrics.accuracy_score(y_test, y_pred_SVM))
cv_results_df = pd.DataFrame(grid_search_SVM_rbf.cv_results_)
cv_results_df[['param_C','param_gamma','mean_test_score' ]]


{'C': 183.29807108324337, 'gamma': 0.1, 'kernel': 'rbf'}
Best Score - Validation: 0.7072727272727274
Accuracy Score - SVM - Test-error: 0.8148148148148148


## SVM sigmoid

In [None]:
estimator_svm = SVC()
C=np.logspace(-4,3,10)
gamma=np.logspace(-4,3,10)
coef=np.linspace(0,10,3)


#sigmoid kernel parameters
parameters_SVM_sigmoid= {
    'C':C,
    'kernel': ['sigmoid'],
    'gamma':gamma,
    'coef0':coef   
}      


grid_search_SVM_sigmoid= GridSearchCV(
    estimator=estimator_svm,
    param_grid=parameters_SVM_sigmoid,
    scoring = 'accuracy',
    cv=RepeatedStratifiedKFold(n_splits=5, n_repeats=10, random_state=0),
    n_jobs=-1,
    return_train_score = True
)

In [None]:
SVM=grid_search_SVM.fit(x_train, y_train)
print(grid_search_SVM.best_params_) 
print('Best Score - Validation:', grid_search_SVM.best_score_ )

y_pred_SVM =SVM.predict(x_test)

print('Accuracy Score - KNN - Test-error:', metrics.accuracy_score(y_test, y_pred_SVM))

In [49]:
cv_results_df = pd.DataFrame(grid_search_SVM.cv_results_)
cv_results_df[[ 'params', 'mean_test_score',  'mean_train_score', 'std_train_score']]

Unnamed: 0,params,mean_test_score,mean_train_score,std_train_score
0,"{'C': 0.1, 'coef0': 0.0, 'degree': 1, 'gamma':...",0.621390,0.621307,0.00183
1,"{'C': 0.1, 'coef0': 0.0, 'degree': 1, 'gamma':...",0.621390,0.621307,0.00183
2,"{'C': 0.1, 'coef0': 0.0, 'degree': 1, 'gamma':...",0.621390,0.621307,0.00183
3,"{'C': 0.1, 'coef0': 0.0, 'degree': 1, 'gamma':...",0.621390,0.621307,0.00183
4,"{'C': 0.1, 'coef0': 0.0, 'degree': 1, 'gamma':...",0.621390,0.621307,0.00183
...,...,...,...,...
319,"{'C': 100.0, 'coef0': 10.0, 'degree': 7, 'gamm...",0.587094,1.000000,0.00000
320,"{'C': 100.0, 'coef0': 10.0, 'degree': 7, 'gamm...",0.621390,0.621307,0.00183
321,"{'C': 100.0, 'coef0': 10.0, 'degree': 7, 'gamm...",0.585918,1.000000,0.00000
322,"{'C': 100.0, 'coef0': 10.0, 'degree': 7, 'gamm...",0.621390,1.000000,0.00000
