In [1]:
# https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_breast_cancer.html
# Dimensionality: 30
# Classes: 2
# 212(M-유방암)-label('0'), 357(B-정상인) - label('1')

from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

In [2]:
X = cancer.data
Y = cancer.target

In [3]:
# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html?highlight=train_test_split#sklearn.model_selection.train_test_split

# 학습 데이터 분할
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2, stratify=Y, random_state=1)

In [43]:
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.decomposition import PCA


Pipeline(memory=None,
         steps=[('reduce_dim',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('clf',
                 SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None,
                     coef0=0.0, decision_function_shape='ovr', degree=3,
                     gamma='scale', kernel='rbf', max_iter=-1,
                     probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

In [47]:
# https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
# 파이파라인 기능을 이용한 모듈 설계

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

# 파라미터 Parsing
estimators = [('normalization', StandardScaler()), ('clf', SVC())]
pipe = Pipeline(estimators)



In [56]:
# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.ParameterGrid.html#sklearn.model_selection.ParameterGrid

from sklearn.model_selection import ParameterGrid
grid = [{'clf__kernel': [['linear']], 'clf__C': [[0.001], [0.01],[0.1],[1],[10], [100], [1000]]}, 
        {'clf__kernel': [['rbf']], 'clf__gamma': [[0.001], [0.01],[0.1],[1],[10], [100], [1000]], 'clf__C': [[0.001], [0.01],[0.1],[1],[10], [100], [1000]]}]

grid_param = ParameterGrid(grid)
list(grid_param)

[{'clf__C': [0.001], 'clf__kernel': ['linear']},
 {'clf__C': [0.01], 'clf__kernel': ['linear']},
 {'clf__C': [0.1], 'clf__kernel': ['linear']},
 {'clf__C': [1], 'clf__kernel': ['linear']},
 {'clf__C': [10], 'clf__kernel': ['linear']},
 {'clf__C': [100], 'clf__kernel': ['linear']},
 {'clf__C': [1000], 'clf__kernel': ['linear']},
 {'clf__C': [0.001], 'clf__gamma': [0.001], 'clf__kernel': ['rbf']},
 {'clf__C': [0.001], 'clf__gamma': [0.01], 'clf__kernel': ['rbf']},
 {'clf__C': [0.001], 'clf__gamma': [0.1], 'clf__kernel': ['rbf']},
 {'clf__C': [0.001], 'clf__gamma': [1], 'clf__kernel': ['rbf']},
 {'clf__C': [0.001], 'clf__gamma': [10], 'clf__kernel': ['rbf']},
 {'clf__C': [0.001], 'clf__gamma': [100], 'clf__kernel': ['rbf']},
 {'clf__C': [0.001], 'clf__gamma': [1000], 'clf__kernel': ['rbf']},
 {'clf__C': [0.01], 'clf__gamma': [0.001], 'clf__kernel': ['rbf']},
 {'clf__C': [0.01], 'clf__gamma': [0.01], 'clf__kernel': ['rbf']},
 {'clf__C': [0.01], 'clf__gamma': [0.1], 'clf__kernel': ['rbf']},

In [61]:
gs = GridSearchCV(pipe, grid_param, scoring='accuracy', cv=10, n_jobs=1)

In [62]:
gs.fit(X_train,Y_train)

GridSearchCV(cv=10, error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('normalization',
                                        StandardScaler(copy=True,
                                                       with_mean=True,
                                                       with_std=True)),
                                       ('clf',
                                        SVC(C=1.0, break_ties=False,
                                            cache_size=200, class_weight=None,
                                            coef0=0.0,
                                            decision_function_shape='ovr',
                                            degree=3, gamma='scale',
                                            kernel='rbf', max_iter=-1,
                                            probability=False,
                                            random_state=None, shrinking=True,
                                            t

In [63]:
print(gs.best_score_)

0.9758454106280192


In [64]:
print(gs.best_params_)

{'clf__C': 10, 'clf__gamma': 0.001, 'clf__kernel': 'rbf'}


In [65]:
best_model = gs.best_estimator_
Y_test_pred = best_model.predict(X_test)

In [66]:
from sklearn.metrics import accuracy_score

Y_test_Pred = best_model.predict(X_test)
accuracy_score(Y_test, Y_test_Pred)

0.9824561403508771