In [16]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.svm import SVC
from sklearn import svm
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.model_selection import train_test_split
import os
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [17]:
Kyphosis = pd.read_csv("C:\Aalesh and Mandar\Datasets\Cases\Kyphosis\Kyphosis.csv")

In [18]:
Kyphosis.head()

Unnamed: 0,Kyphosis,Age,Number,Start
0,absent,71,3,5
1,absent,158,3,14
2,present,128,4,5
3,absent,2,5,1
4,absent,1,4,15


In [19]:
Kyphosis = pd.get_dummies(Kyphosis, drop_first=True)

In [20]:
Kyphosis.head()

Unnamed: 0,Age,Number,Start,Kyphosis_present
0,71,3,5,0
1,158,3,14,0
2,128,4,5,1
3,2,5,1,0
4,1,4,15,0


In [21]:
x = Kyphosis.drop(["Kyphosis_present"], axis=1)
y=Kyphosis["Kyphosis_present"]

In [22]:
x_train, x_test, y_train, y_test = train_test_split(x, y, stratify = y, random_state = 2022, train_size = 0.7)

In [23]:
svm = SVC(kernel="linear", probability = True,  random_state = 2022)
svm.fit(x_train, y_train)
y_pred = svm.predict(x_test)
print(accuracy_score(y_test, y_pred))

0.76


In [24]:
y_pred_prob = svm.predict_proba(x_test)[:,1]
print(roc_auc_score(y_test, y_pred_prob))

0.64


In [25]:
scaler = StandardScaler()
kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 2022)
params = {"C" : np.linspace(0.001, 10, 20)}
gcv = GridSearchCV(svm, param_grid = params, verbose = 3, scoring = "roc_auc", cv = kfold)
gcv.fit(x_train, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END ...........................C=0.001;, score=0.778 total time=   0.0s
[CV 2/5] END ...........................C=0.001;, score=0.875 total time=   0.0s
[CV 3/5] END ...........................C=0.001;, score=1.000 total time=   0.0s
[CV 4/5] END ...........................C=0.001;, score=0.944 total time=   0.0s
[CV 5/5] END ...........................C=0.001;, score=0.889 total time=   0.0s
[CV 1/5] END ..............C=0.5272631578947369;, score=0.852 total time=   0.0s
[CV 2/5] END ..............C=0.5272631578947369;, score=0.875 total time=   0.0s
[CV 3/5] END ..............C=0.5272631578947369;, score=1.000 total time=   0.0s
[CV 4/5] END ..............C=0.5272631578947369;, score=0.944 total time=   0.0s
[CV 5/5] END ..............C=0.5272631578947369;, score=0.889 total time=   0.0s
[CV 1/5] END ..............C=1.0535263157894736;, score=0.852 total time=   0.0s
[CV 2/5] END ..............C=1.0535263157894736

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=2022, shuffle=True),
             estimator=SVC(kernel='linear', probability=True,
                           random_state=2022),
             param_grid={'C': array([1.00000000e-03, 5.27263158e-01, 1.05352632e+00, 1.57978947e+00,
       2.10605263e+00, 2.63231579e+00, 3.15857895e+00, 3.68484211e+00,
       4.21110526e+00, 4.73736842e+00, 5.26363158e+00, 5.78989474e+00,
       6.31615789e+00, 6.84242105e+00, 7.36868421e+00, 7.89494737e+00,
       8.42121053e+00, 8.94747368e+00, 9.47373684e+00, 1.00000000e+01])},
             scoring='roc_auc', verbose=3)

In [26]:
print(gcv.best_params_)
print(gcv.best_score_)

{'C': 0.5272631578947369}
0.912037037037037


## with the help of scaling

In [32]:
pipe = Pipeline([("STD", scaler),("SVM",svm)])
pipe.get_params()

{'memory': None,
 'steps': [('STD', StandardScaler()),
  ('SVM', SVC(kernel='linear', probability=True, random_state=2022))],
 'verbose': False,
 'STD': StandardScaler(),
 'SVM': SVC(kernel='linear', probability=True, random_state=2022),
 'STD__copy': True,
 'STD__with_mean': True,
 'STD__with_std': True,
 'SVM__C': 1.0,
 'SVM__break_ties': False,
 'SVM__cache_size': 200,
 'SVM__class_weight': None,
 'SVM__coef0': 0.0,
 'SVM__decision_function_shape': 'ovr',
 'SVM__degree': 3,
 'SVM__gamma': 'scale',
 'SVM__kernel': 'linear',
 'SVM__max_iter': -1,
 'SVM__probability': True,
 'SVM__random_state': 2022,
 'SVM__shrinking': True,
 'SVM__tol': 0.001,
 'SVM__verbose': False}

In [33]:
kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 2022)
params = {"SVM__C" : np.linspace(0.001, 10, 20)}
gcv = GridSearchCV(pipe, param_grid = params, verbose = 3, scoring = "roc_auc", cv = kfold)
gcv.fit(x, y)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END ......................SVM__C=0.001;, score=0.885 total time=   0.0s
[CV 2/5] END ......................SVM__C=0.001;, score=0.846 total time=   0.0s
[CV 3/5] END ......................SVM__C=0.001;, score=0.718 total time=   0.0s
[CV 4/5] END ......................SVM__C=0.001;, score=0.846 total time=   0.0s
[CV 5/5] END ......................SVM__C=0.001;, score=0.917 total time=   0.0s
[CV 1/5] END .........SVM__C=0.5272631578947369;, score=0.885 total time=   0.0s
[CV 2/5] END .........SVM__C=0.5272631578947369;, score=0.846 total time=   0.0s
[CV 3/5] END .........SVM__C=0.5272631578947369;, score=0.744 total time=   0.0s
[CV 4/5] END .........SVM__C=0.5272631578947369;, score=0.897 total time=   0.0s
[CV 5/5] END .........SVM__C=0.5272631578947369;, score=0.917 total time=   0.0s
[CV 1/5] END .........SVM__C=1.0535263157894736;, score=0.885 total time=   0.0s
[CV 2/5] END .........SVM__C=1.0535263157894736

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=2022, shuffle=True),
             estimator=Pipeline(steps=[('STD', StandardScaler()),
                                       ('SVM',
                                        SVC(kernel='linear', probability=True,
                                            random_state=2022))]),
             param_grid={'SVM__C': array([1.00000000e-03, 5.27263158e-01, 1.05352632e+00, 1.57978947e+00,
       2.10605263e+00, 2.63231579e+00, 3.15857895e+00, 3.68484211e+00,
       4.21110526e+00, 4.73736842e+00, 5.26363158e+00, 5.78989474e+00,
       6.31615789e+00, 6.84242105e+00, 7.36868421e+00, 7.89494737e+00,
       8.42121053e+00, 8.94747368e+00, 9.47373684e+00, 1.00000000e+01])},
             scoring='roc_auc', verbose=3)

In [34]:
print(gcv.best_params_)
print(gcv.best_score_)

{'SVM__C': 0.5272631578947369}
0.8576923076923076
