In [32]:
import pandas as pd
import numpy as np 
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV


In [9]:
cancer = pd.read_csv('BreastCancer.csv',index_col=0)
cancer.head(3)

Unnamed: 0_level_0,Clump,UniCell_Size,Uni_CellShape,MargAdh,SEpith,BareN,BChromatin,NoemN,Mitoses,Class
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
61634,5,4,3,1,2,2,2,3,1,Benign
63375,9,1,2,6,4,10,7,7,2,Malignant
76389,10,4,7,2,2,8,6,1,1,Malignant


In [22]:
X = cancer.drop(columns='Class')
y = cancer.Class

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=24, test_size=0.3,stratify=y)

# Linear Kernel

In [40]:
# use random_state when you use probability=True

In [None]:
svm = SVC(kernel = 'linear',probability=True, random_state=24) 
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print(accuracy_score(y_test,y_pred))
print("-------------------------------------------------")
y_pred_proba = svm.predict_proba(X_test)[:,1]
print(roc_auc_score(y_test,y_pred_proba))

### Linear Kernel by Grid Search CV

In [43]:
svm = SVC(kernel='linear',probability=True, random_state=24)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2)
params = {'C':np.linspace(0.001,5,10)}
gcv = GridSearchCV(svm, param_grid=params, cv = kfold, scoring='roc_auc')
gcv.fit(X,y)

In [44]:
print(gcv.best_params_)
print(gcv.best_score_)

{'C': np.float64(0.5564444444444444)}
0.9948742934823235


# Polynomial Kernel

In [50]:
svm = SVC(kernel = 'poly',probability=True, random_state=24) 
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print(accuracy_score(y_test,y_pred))
print("-------------------------------------------------")
y_pred_proba = svm.predict_proba(X_test)[:,1]
print(roc_auc_score(y_test,y_pred_proba))

0.9714285714285714
-------------------------------------------------
0.9974838969404187


In [45]:
# use degree= [default = 3]  when you use kernel = 'poly'

In [46]:
svm = SVC(kernel='poly',probability=True, random_state=24)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2)
params = {'C':np.linspace(0.001,5,10),'degree':[2,3,4]}
gcv = GridSearchCV(svm, param_grid=params, cv = kfold, scoring='roc_auc')
gcv.fit(X,y)

In [47]:
print(gcv.best_params_)
print(gcv.best_score_)

{'C': np.float64(0.001), 'degree': 3}
0.994783774713899


# Redial Kernel
- also known as GAUSSION KERNEL

In [51]:
svm = SVC(kernel = 'rbf',probability=True, random_state=24) 
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print(accuracy_score(y_test,y_pred))
print("-------------------------------------------------")
y_pred_proba = svm.predict_proba(X_test)[:,1]
print(roc_auc_score(y_test,y_pred_proba))

0.9761904761904762
-------------------------------------------------
0.9911433172302737


In [52]:
svm = SVC(kernel='rbf',probability=True, random_state=24)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2)
params = {'C':np.linspace(0.001,5,10),'gamma':np.linspace(0.001,5,10)}
gcv = GridSearchCV(svm, param_grid=params, cv = kfold, scoring='roc_auc')
gcv.fit(X,y)

In [53]:
print(gcv.best_params_)
print(gcv.best_score_)

{'C': np.float64(2.2227777777777775), 'gamma': np.float64(0.001)}
0.9947379871681115
