In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

data = load_breast_cancer()
X = data.data
Y = data.target

# Set the parameters by cross-validation
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state=4)
len_x_train = len(X_train)
len_x_test = len(X_test)
len_y_train = len(Y_train)
len_y_test = len(Y_test)

print(f'len_x_train = {len_x_train}')
print(f'len_x_test = {len_x_test}')
print(f'len_y_train = {len_y_train}')
print(f'len_y_trst = {len_y_test}')

scores = ['roc_auc']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()
    
    skf = StratifiedKFold(n_splits=10, random_state=25, shuffle=True)
    clf = GridSearchCV(SVC(), tuned_parameters, cv=skf, scoring = score)
    clf.fit(X_train, Y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (-mean, -std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    Y_true, Y_pred = Y_test, clf.predict(X_test)
    print(classification_report(Y_true, Y_pred))
    print()

len_x_train = 398
len_x_test = 171
len_y_train = 398
len_y_trst = 171
# Tuning hyper-parameters for roc_auc

Best parameters set found on development set:

{'C': 1000, 'kernel': 'linear'}

Grid scores on development set:

-0.968 (+/--0.052) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
-0.977 (+/--0.051) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
-0.954 (+/--0.072) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
-0.968 (+/--0.068) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
-0.954 (+/--0.072) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
-0.958 (+/--0.067) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
-0.954 (+/--0.072) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
-0.947 (+/--0.075) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
-0.989 (+/--0.027) for {'C': 1, 'kernel': 'linear'}
-0.986 (+/--0.038) for {'C': 10, 'kernel': 'linear'}
-0.990 (+/--0.019) for {'C': 100, 'kernel': 'linear'}
-0.991 (+/--0.015) for {'C': 1000, 'kernel': 'linear'}

Detailed classification rep

In [2]:
print(f'{type(clf.best_params_)}')
print(clf.best_params_)

<class 'dict'>
{'C': 1000, 'kernel': 'linear'}


In [3]:
clf.best_estimator_

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [4]:
nested_score = cross_val_score(clf.best_estimator_, X=X_test, y=Y_test, cv=10, scoring = 'accuracy')

In [6]:
nested_score.mean()

0.9354983660130719

In [11]:
clf.best_estimator_.get_params()

{'C': 1000,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'auto',
 'kernel': 'linear',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}