In [11]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split 
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures # for polynomial features
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.model_selection import GridSearchCV


In [12]:
kyp = pd.read_csv(r'..\Cases\Kyphosis\Kyphosis.csv')
kyp.head()

Unnamed: 0,Kyphosis,Age,Number,Start
0,absent,71,3,5
1,absent,158,3,14
2,present,128,4,5
3,absent,2,5,1
4,absent,1,4,15


In [13]:
from sklearn.preprocessing import LabelEncoder


le = LabelEncoder()
y = le.fit_transform(kyp['Kyphosis'])
X = kyp.drop('Kyphosis', axis=1)
print(le.classes_)

['absent' 'present']


In [14]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=24, stratify=y)

### Linear SVM

In [15]:
from sklearn.svm import SVC

svc = SVC(C=0.5, kernel='linear')

svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)

In [16]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss, roc_auc_score

# y_pred_prob = svc.predict_proba(X_test)

print(accuracy_score(y_test, y_pred))
# print(roc_auc_score(y_test,y_pred_prob[:,1]))
# print(log_loss(y_test, y_pred_prob))

0.76


In [17]:
from sklearn.model_selection import StratifiedKFold


kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

params = {'C': [0.1,0.5, 1, 1.5, 2, 3]}

gcv = GridSearchCV(svc, param_grid=params, cv=kfold)

gcv.fit(X, y)

print(gcv.best_params_)
print(gcv.best_score_)

{'C': 0.5}
0.8036764705882353


In [18]:
params = {'C': np.linspace(0.001, 5, 10)}

gcv = GridSearchCV(svc, param_grid=params, cv=kfold)

gcv.fit(X, y)

print(gcv.best_params_)
print(gcv.best_score_)

{'C': 0.5564444444444444}
0.8036764705882353


In [19]:
params = {'C': [0.1,0.5, 1, 1.5, 2, 3]}

gcv = GridSearchCV(svc, param_grid=params, cv=kfold)

gcv.fit(X, y)
pd_cv = pd.DataFrame(gcv.cv_results_)
print(gcv.best_params_)
print(gcv.best_score_)
print(pd_cv)

{'C': 0.5}
0.8036764705882353
   mean_fit_time  std_fit_time  mean_score_time  std_score_time param_C  \
0       0.009449      0.009965         0.002097        0.001956     0.1   
1       0.010069      0.013795         0.001781        0.003089     0.5   
2       0.015231      0.010925         0.006521        0.007590       1   
3       0.028804      0.017705         0.000597        0.000796     1.5   
4       0.033943      0.026876         0.003935        0.005892       2   
5       0.046341      0.036232         0.003458        0.006140       3   

       params  split0_test_score  split1_test_score  split2_test_score  \
0  {'C': 0.1}           0.705882             0.8125              0.875   
1  {'C': 0.5}           0.705882             0.8125              0.875   
2    {'C': 1}           0.705882             0.8125              0.875   
3  {'C': 1.5}           0.705882             0.8125              0.875   
4    {'C': 2}           0.705882             0.8125              0.875   


### SVC with Probability = True

In [21]:
svc = SVC(C=0.5, kernel='linear', probability=True, random_state=24)

svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)

y_pred = svc.predict(X_test)

y_pred_prob = svc.predict_proba(X_test)

print(accuracy_score(y_test, y_pred))
print(roc_auc_score(y_test,y_pred_prob[:,1]))
print(log_loss(y_test, y_pred_prob))

0.76
0.81
0.4124543059870036


In [23]:
params = {'C': [0.1,0.5, 1, 1.5, 2, 3]}

gcv = GridSearchCV(svc, param_grid=params, cv=kfold, scoring='neg_log_loss')

gcv.fit(X, y)
pd_cv = pd.DataFrame(gcv.cv_results_)
print(gcv.best_params_)
print(gcv.best_score_)
# print(pd_cv)

{'C': 0.1}
-0.4480718873347128


In [41]:
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import StratifiedKFold
import numpy as np

kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()
svc = SVC( kernel='linear', probability=True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC', svc)])

print(pipe.get_params())
params = {
    'SVC__C': np.linspace(0.001, 5, 30),
    # 'SVC__C': [1,2,3,4,5,6,7,8,9,10], 8
    # 'SVC__C': [0.1,1,1.5,2,3],  0.1
          'SCL': [None, std_scaler, mm_scaler]
          }

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold,
                    scoring='neg_log_loss'
                    )

gcv.fit(X, y)

print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', None), ('SVC', SVC(kernel='linear', probability=True, random_state=24))], 'verbose': False, 'SCL': None, 'SVC': SVC(kernel='linear', probability=True, random_state=24), 'SVC__C': 1.0, 'SVC__break_ties': False, 'SVC__cache_size': 200, 'SVC__class_weight': None, 'SVC__coef0': 0.0, 'SVC__decision_function_shape': 'ovr', 'SVC__degree': 3, 'SVC__gamma': 'scale', 'SVC__kernel': 'linear', 'SVC__max_iter': -1, 'SVC__probability': True, 'SVC__random_state': 24, 'SVC__shrinking': True, 'SVC__tol': 0.001, 'SVC__verbose': False}
{'SCL': None, 'SVC__C': 0.17337931034482756}
-0.44601893205301735


### Non-Linear SVM (Polynomial)

In [44]:
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import StratifiedKFold
import numpy as np

kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()
svc = SVC( kernel='poly', probability=True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC', svc)])

print(pipe.get_params())
params = {
    'SVC__C': np.linspace(0.001, 5, 20),
          'SCL': [None, std_scaler, mm_scaler],
          'SVC__degree':[2,3],
          'SVC__coef0':np.linspace(0,3,5)
          }

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold,
                    scoring='neg_log_loss',verbose=2
                    )

gcv.fit(X, y)

print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', None), ('SVC', SVC(kernel='poly', probability=True, random_state=24))], 'verbose': False, 'SCL': None, 'SVC': SVC(kernel='poly', probability=True, random_state=24), 'SVC__C': 1.0, 'SVC__break_ties': False, 'SVC__cache_size': 200, 'SVC__class_weight': None, 'SVC__coef0': 0.0, 'SVC__decision_function_shape': 'ovr', 'SVC__degree': 3, 'SVC__gamma': 'scale', 'SVC__kernel': 'poly', 'SVC__max_iter': -1, 'SVC__probability': True, 'SVC__random_state': 24, 'SVC__shrinking': True, 'SVC__tol': 0.001, 'SVC__verbose': False}
Fitting 5 folds for each of 600 candidates, totalling 3000 fits
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC__degree=2; total time=   0.0s
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC__degree=2; total time=   0.0s
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC__degree=2; total time=   0.0s
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC__degree=2; total time=   0.0s
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC

### Non-Linear SVM (Radial Basis Function)

In [46]:
kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()
svc = SVC( kernel='rbf', probability=True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC', svc)])

print(pipe.get_params())
params = {
    'SVC__C': np.linspace(0.001, 5, 20),
          'SCL': [None, std_scaler, mm_scaler],
          'SVC__gamma':np.linspace(0.001,5,5)
          }

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold,
                    scoring='neg_log_loss',verbose=2
                    )

gcv.fit(X, y)
pd_cv = pd.DataFrame(gcv.cv_results_)
print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', None), ('SVC', SVC(probability=True, random_state=24))], 'verbose': False, 'SCL': None, 'SVC': SVC(probability=True, random_state=24), 'SVC__C': 1.0, 'SVC__break_ties': False, 'SVC__cache_size': 200, 'SVC__class_weight': None, 'SVC__coef0': 0.0, 'SVC__decision_function_shape': 'ovr', 'SVC__degree': 3, 'SVC__gamma': 'scale', 'SVC__kernel': 'rbf', 'SVC__max_iter': -1, 'SVC__probability': True, 'SVC__random_state': 24, 'SVC__shrinking': True, 'SVC__tol': 0.001, 'SVC__verbose': False}
Fitting 5 folds for each of 300 candidates, totalling 1500 fits
[CV] END ...........SCL=None, SVC__C=0.001, SVC__gamma=0.001; total time=   0.0s
[CV] END ...........SCL=None, SVC__C=0.001, SVC__gamma=0.001; total time=   0.0s
[CV] END ...........SCL=None, SVC__C=0.001, SVC__gamma=0.001; total time=   0.0s
[CV] END ...........SCL=None, SVC__C=0.001, SVC__gamma=0.001; total time=   0.0s
[CV] END ...........SCL=None, SVC__C=0.001, SVC__gamma=0.001; total time=   0.0s
[CV] END S

In [47]:
kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()
svc = SVC( kernel='sigmoid', probability=True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC', svc)])

print(pipe.get_params())
params = {
    'SVC__C': np.linspace(0.001, 5, 20),
          'SCL': [None, std_scaler, mm_scaler],
          'SVC__gamma':np.linspace(0.001,5,5),
          'SVC__coef0':np.linspace(0,3,5)
          }

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold,
                    scoring='neg_log_loss',verbose=2
                    )

gcv.fit(X, y)
pd_cv = pd.DataFrame(gcv.cv_results_)
print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', None), ('SVC', SVC(kernel='sigmoid', probability=True, random_state=24))], 'verbose': False, 'SCL': None, 'SVC': SVC(kernel='sigmoid', probability=True, random_state=24), 'SVC__C': 1.0, 'SVC__break_ties': False, 'SVC__cache_size': 200, 'SVC__class_weight': None, 'SVC__coef0': 0.0, 'SVC__decision_function_shape': 'ovr', 'SVC__degree': 3, 'SVC__gamma': 'scale', 'SVC__kernel': 'sigmoid', 'SVC__max_iter': -1, 'SVC__probability': True, 'SVC__random_state': 24, 'SVC__shrinking': True, 'SVC__tol': 0.001, 'SVC__verbose': False}
Fitting 5 folds for each of 1500 candidates, totalling 7500 fits
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC__gamma=0.001; total time=   0.0s
[CV] END SCL=None, SVC__C=0.00

KeyboardInterrupt: 