In [1]:
import pandas as pd
import numpy as np

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
%matplotlib inline

In [4]:
from sklearn.datasets import load_breast_cancer

In [5]:
cancer = load_breast_cancer()

In [6]:
cancer.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [7]:
#print(cancer['DESCR'])

In [8]:
df = pd.DataFrame(data=cancer['data'], columns=cancer['feature_names'])

In [9]:
# cancer['target']
cancer['target_names']

array(['malignant', 'benign'], dtype='<U9')

In [10]:
from sklearn.model_selection import train_test_split


In [11]:
X = df
y = cancer['target']
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3)

In [12]:
# Grab support vector classifier

In [13]:
import sklearn
from sklearn.svm import SVC
sklearn.set_config(print_changed_only=False)
model = SVC(verbose=True)

In [14]:
model.fit(X_train, y_train)

[LibSVM]

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=True)

In [15]:
pred = model.predict(X_test)

In [16]:
from sklearn.metrics import classification_report, confusion_matrix

In [17]:
print(classification_report(y_test, pred))
print(confusion_matrix(y_test,pred))

              precision    recall  f1-score   support

           0       1.00      0.72      0.83        81
           1       0.80      1.00      0.89        90

    accuracy                           0.87       171
   macro avg       0.90      0.86      0.86       171
weighted avg       0.89      0.87      0.86       171

[[58 23]
 [ 0 90]]


In [18]:
from sklearn.model_selection import GridSearchCV

In [19]:
# overall generalisation
# C controls the cost of classification, low bias and high variance
# gamma parameter --> radio basis function
# large gamma : high bias, low variance
# read chapter 9

param_grid = {'gamma':[1,0.1,0.01,0.001], 'C':[0.1,1,10,100,1000]}

In [20]:
grid = GridSearchCV(SVC(), param_grid, verbose=3)

In [21]:
grid

GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [76]:
grid.best_params_

{'C': 1, 'gamma': 0.001}

In [78]:
predi_grid = grid.predict(X_test)

In [80]:
print(confusion_matrix(y_test, pred_grid))
print(classification_report(y_test, predi_grid))

[[60  3]
 [10 98]]
              precision    recall  f1-score   support

           0       0.86      0.95      0.90        63
           1       0.97      0.91      0.94       108

    accuracy                           0.92       171
   macro avg       0.91      0.93      0.92       171
weighted avg       0.93      0.92      0.92       171

