In [1]:
import pandas as pd
from sklearn.datasets import load_breast_cancer

In [2]:
breast_cancer_dict = load_breast_cancer()
breast_cancer_dict.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [3]:
breast_cancer_dict.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [4]:
breast_cancer_dict.target_names

array(['malignant', 'benign'], dtype='<U9')

In [5]:
breast_cancer_dataset = pd.DataFrame(breast_cancer_dict.data, columns=breast_cancer_dict.feature_names)
breast_cancer_dataset['target']=breast_cancer_dict.target
breast_cancer_dataset.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [6]:
breast_cancer_dataset.shape

(569, 31)

In [7]:
X=breast_cancer_dataset.iloc[:,:-1]
y=breast_cancer_dataset['target']

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=20)

In [9]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

In [10]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [11]:
# linear kernel 
svm_classifier_linear = SVC(kernel='linear')
search_dic_linear = {'C': [0.01, 0.1, 1 , 8, 10 , 100, 1000]}
grid_search_linear = GridSearchCV(svm_classifier_linear, search_dic_linear)

In [12]:
grid_search_linear.fit(X_train_scaled, y_train)

GridSearchCV(estimator=SVC(kernel='linear'),
             param_grid={'C': [0.01, 0.1, 1, 8, 10, 100, 1000]})

In [13]:
y_pred_linear = grid_search_linear.predict(X_test_scaled)

In [14]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
# Confusion matrix
print("Confusion Matrix")
matrix = confusion_matrix(y_test, y_pred_linear)
print(matrix)
# Classification Report
print("\nClassification Report")
report = classification_report(y_test, y_pred_linear)
print(report)
# Accuracy of the model
accuracy = accuracy_score(y_test, y_pred_linear)
print('SVM (kernel=linear) Classification Accuracy of the model: {:.2f}%'.format(accuracy*100))
print()
print("Best parameters set found on development set:")
print(grid_search_linear.best_params_)

Confusion Matrix
[[ 63   1]
 [  0 107]]

Classification Report
              precision    recall  f1-score   support

           0       1.00      0.98      0.99        64
           1       0.99      1.00      1.00       107

    accuracy                           0.99       171
   macro avg       1.00      0.99      0.99       171
weighted avg       0.99      0.99      0.99       171

SVM (kernel=linear) Classification Accuracy of the model: 99.42%

Best parameters set found on development set:
{'C': 8}


In [15]:
# Polynomial kernel 
svm_classifier_poly = SVC(kernel='poly')
search_dic_poly = {'C': [0.01, 0.1, 1 , 8, 10 , 100, 1000],'degree':[3,5,8] ,'gamma': [1, 0.1, 0.01, 0.001, 0.0001]}
grid_search_poly = GridSearchCV(svm_classifier_poly, search_dic_poly)

In [16]:
grid_search_poly.fit(X_train_scaled, y_train)

GridSearchCV(estimator=SVC(kernel='poly'),
             param_grid={'C': [0.01, 0.1, 1, 8, 10, 100, 1000],
                         'degree': [3, 5, 8],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001]})

In [17]:
y_pred_poly = grid_search_poly.predict(X_test_scaled)

In [18]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
# Confusion matrix
print("Confusion Matrix")
matrix = confusion_matrix(y_test, y_pred_poly)
print(matrix)
# Classification Report
print("\nClassification Report")
report = classification_report(y_test, y_pred_poly)
print(report)
# Accuracy of the model
accuracy = accuracy_score(y_test, y_pred_poly)
print('SVM (kernel=ploy) Classification Accuracy of the model: {:.2f}%'.format(accuracy*100))
print()
print("Best parameters set found on development set:")
print(grid_search_poly.best_params_)

Confusion Matrix
[[ 63   1]
 [  0 107]]

Classification Report
              precision    recall  f1-score   support

           0       1.00      0.98      0.99        64
           1       0.99      1.00      1.00       107

    accuracy                           0.99       171
   macro avg       1.00      0.99      0.99       171
weighted avg       0.99      0.99      0.99       171

SVM (kernel=ploy) Classification Accuracy of the model: 99.42%

Best parameters set found on development set:
{'C': 0.1, 'degree': 3, 'gamma': 1}


In [19]:
# rbf kernel 
svm_classifier_rbf = SVC(kernel='rbf')
search_dic_rbf = {'C': [0.01, 0.1, 1 , 8, 10 , 100, 1000] ,'gamma': [1, 0.1, 0.01, 0.001, 0.0001]}
grid_search_rbf = GridSearchCV(svm_classifier_rbf, search_dic_rbf)

In [20]:
grid_search_rbf.fit(X_train_scaled, y_train)

GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.01, 0.1, 1, 8, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001]})

In [21]:
y_pred_rbf = grid_search_rbf.predict(X_test_scaled)

In [22]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
# Confusion matrix
print("Confusion Matrix")
matrix = confusion_matrix(y_test, y_pred_rbf)
print(matrix)
# Classification Report
print("\nClassification Report")
report = classification_report(y_test, y_pred_rbf)
print(report)
# Accuracy of the model
accuracy = accuracy_score(y_test, y_pred_rbf)
print('SVM (kernel=rbf) Classification Accuracy of the model: {:.2f}%'.format(accuracy*100))
print()
print("Best parameters set found on development set:")
print(grid_search_rbf.best_params_)

Confusion Matrix
[[ 63   1]
 [  0 107]]

Classification Report
              precision    recall  f1-score   support

           0       1.00      0.98      0.99        64
           1       0.99      1.00      1.00       107

    accuracy                           0.99       171
   macro avg       1.00      0.99      0.99       171
weighted avg       0.99      0.99      0.99       171

SVM (kernel=rbf) Classification Accuracy of the model: 99.42%

Best parameters set found on development set:
{'C': 100, 'gamma': 0.01}


In [23]:
# sigmoid kernel 
svm_classifier_sigmoid = SVC(kernel='sigmoid')
search_dic_sigmoid = {'C': [0.01, 0.1, 1 , 8, 10 , 100, 1000] ,'gamma': [1, 0.1, 0.01, 0.001, 0.0001]}
grid_search_sigmoid = GridSearchCV(svm_classifier_sigmoid, search_dic_sigmoid)

In [24]:
grid_search_sigmoid.fit(X_train_scaled, y_train)

GridSearchCV(estimator=SVC(kernel='sigmoid'),
             param_grid={'C': [0.01, 0.1, 1, 8, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001]})

In [25]:
y_pred_sigmoid = grid_search_sigmoid.predict(X_test_scaled)

In [26]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
# Confusion matrix
print("Confusion Matrix")
matrix = confusion_matrix(y_test, y_pred_sigmoid)
print(matrix)
# Classification Report
print("\nClassification Report")
report = classification_report(y_test, y_pred_sigmoid)
print(report)
# Accuracy of the model
accuracy = accuracy_score(y_test, y_pred_sigmoid)
print('SVM (kernel=sigmoid) Classification Accuracy of the model: {:.2f}%'.format(accuracy*100))
print()
print("Best parameters set found on development set:")
print(grid_search_sigmoid.best_params_)

Confusion Matrix
[[ 61   3]
 [  0 107]]

Classification Report
              precision    recall  f1-score   support

           0       1.00      0.95      0.98        64
           1       0.97      1.00      0.99       107

    accuracy                           0.98       171
   macro avg       0.99      0.98      0.98       171
weighted avg       0.98      0.98      0.98       171

SVM (kernel=sigmoid) Classification Accuracy of the model: 98.25%

Best parameters set found on development set:
{'C': 8, 'gamma': 0.1}
