# 앙상블 학습

In [1]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

In [2]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
cancer_scaled = scaler.fit_transform(cancer.data)

In [3]:
#데이터분리
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    cancer_scaled, cancer.target, test_size=0.2, random_state=156
    )

### 앙상블 학습을 위한 분류기
- 로지스틱회귀
- 서포트벡터머신
- K최근접이웃

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

lr = LogisticRegression()
svc = SVC()
knn = KNeighborsClassifier()

In [8]:
#앙상블 학습을 위한 보팅분류기
#앙상블을위해서는 통합해서 처리해야할 녀석이 필요! 보팅분류기를만들자
from sklearn.ensemble import VotingClassifier
#일반적으로만드는것과 좀 차이있음
vo_clf = VotingClassifier(
    estimators=[('LR', lr), ('SVC', svc), ('KNN', knn)], voting='hard' #0101로나오는 hard (svc모델때문에)
)

In [9]:
vo_clf.fit(X_train, y_train)
pred = vo_clf.predict(X_test)

In [10]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, pred)
#모델3개 하드보팅했을경우, 정확도 0.9649

0.9649122807017544

### 개별 모델의 학습/예측/평가

In [11]:
classifiers = [lr, svc, knn]
for classifier in classifiers:
    classifier.fit(X_train, y_train)
    pred = classifier.predict(X_test)
    acc = accuracy_score(y_test, pred)
    class_name = classifier.__class__.__name__ #classifier속성을 이용해서 클래스네임을 얻을수있다
    print(f'{class_name} 정확도: {acc:.4f}')
#SVC 정확도 0.9649로 가장 높다

LogisticRegression 정확도: 0.9561
SVC 정확도: 0.9649
KNeighborsClassifier 정확도: 0.9561


### 로지스틱회귀, K-NN을 이용한 소프트보팅
### 위와다르게 svc를 빼고 voting='soft'로 해보자

In [13]:
vo_clf = VotingClassifier(
    estimators=[('LR', lr), ('KNN', knn)], voting='soft'
)
vo_clf.fit(X_train, y_train)
pred = vo_clf.predict(X_test)
accuracy_score(y_test, pred)

0.9649122807017544

In [14]:
classifiers = [lr, knn]
for classifier in classifiers:
    classifier.fit(X_train, y_train)
    pred = classifier.predict(X_test)
    acc = accuracy_score(y_test, pred)
    class_name = classifier.__class__.__name__ #classifier속성을 이용해서 클래스네임을 얻을수있다
    print(f'{class_name} 정확도: {acc:.4f}')

LogisticRegression 정확도: 0.9561
KNeighborsClassifier 정확도: 0.9561
