In [1]:
# 앙상블 학습

In [2]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

In [3]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
cancer_scaled = scaler.fit_transform(cancer.data)

In [20]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(
    cancer_scaled, cancer.target, stratify=cancer.target, test_size=0.2, random_state=2011 
)

### 앙상블 학습을 위한 분류기

- 로지스틱 회귀
- 서포트 백터 머신
- k 최근접 아웃

In [21]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [22]:
lr = LogisticRegression()
svc = SVC()
knn = KNeighborsClassifier()

In [23]:
from sklearn.ensemble import VotingClassifier

vo_clf = VotingClassifier(
    estimators=[('LR', lr),('SVC', svc),('KNN', knn)],
    voting='hard'
)

In [24]:
vo_clf.fit(X_train, y_train)
pred = vo_clf.predict(X_test)

In [25]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, pred)

0.956140350877193

### 개별 모델의 학습/예측/평가

In [26]:
for classifier in [lr, svc, knn]:
    classifier.fit(X_train, y_train)
    pred = classifier.predict(X_test)
    acc = accuracy_score(y_test, pred)
    classifier_name = classifier.__class__.__name__
    print(f'{classifier_name} 정확도: {acc: 4f}')

LogisticRegression 정확도:  0.938596
SVC 정확도:  0.964912
KNeighborsClassifier 정확도:  0.956140


### 소프트 보팅
- 로지스틱 회귀
- k 최근접 아웃 

In [27]:
vo_clf = VotingClassifier(
    estimators=[('LR', lr),('KNN', knn)],
    voting='soft'
)

vo_clf.fit(X_train, y_train)
pred = vo_clf.predict(X_test)
accuracy_score(y_test, pred)

0.956140350877193

In [28]:
vo_clf.predict_proba(X_test)

array([[9.93299003e-01, 6.70099726e-03],
       [9.97860114e-01, 2.13988594e-03],
       [1.55035222e-02, 9.84496478e-01],
       [2.11048802e-02, 9.78895120e-01],
       [9.74826742e-01, 2.51732577e-02],
       [1.18101174e-01, 8.81898826e-01],
       [1.83346646e-02, 9.81665335e-01],
       [1.72260326e-03, 9.98277397e-01],
       [4.17790994e-02, 9.58220901e-01],
       [4.57872746e-02, 9.54212725e-01],
       [2.50799952e-02, 9.74920005e-01],
       [2.98787609e-02, 9.70121239e-01],
       [2.25234183e-03, 9.97747658e-01],
       [7.50971304e-01, 2.49028696e-01],
       [9.21979440e-03, 9.90780206e-01],
       [9.83839355e-01, 1.61606449e-02],
       [2.83304790e-03, 9.97166952e-01],
       [1.06895121e-01, 8.93104879e-01],
       [6.41086893e-01, 3.58913107e-01],
       [1.63905669e-02, 9.83609433e-01],
       [9.52456926e-01, 4.75430742e-02],
       [6.88063781e-01, 3.11936219e-01],
       [9.98353441e-01, 1.64655883e-03],
       [2.82023149e-01, 7.17976851e-01],
       [1.402176