In [1]:
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [6]:
cancer_data = load_breast_cancer()
X_data = cancer_data.data
y_label = cancer_data.target

X_train, X_test, y_train, y_test = train_test_split(X_data, y_label, test_size=0.2, random_state=42)

print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(455, 30) (114, 30)
(455,) (114,)


In [9]:
# 개별 모델 생성
knn_clf = KNeighborsClassifier(n_neighbors=4)
dt_clf = DecisionTreeClassifier()
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
ada_clf = AdaBoostClassifier(n_estimators=100)

# 최종 Stacking 모델 생성
lr_final_clf = LogisticRegression(C=10)

In [11]:
knn_clf.fit(X_train, y_train)
dt_clf.fit(X_train, y_train)
rf_clf.fit(X_train, y_train)
ada_clf.fit(X_train, y_train)

knn_pred = knn_clf.predict(X_test)
dt_pred = dt_clf.predict(X_test)
rf_pred = rf_clf.predict(X_test)
ada_pred = ada_clf.predict(X_test)

print('KNN의 정확도 ', accuracy_score(y_test, knn_pred))
print('DecisionTree의 정확도 ', accuracy_score(y_test, dt_pred))
print('RandomForest의 정확도 ', accuracy_score(y_test, rf_pred))
print('AdaBoost의 정확도 ', accuracy_score(y_test, ada_pred))


KNN의 정확도  0.9385964912280702
DecisionTree의 정확도  0.9385964912280702
RandomForest의 정확도  0.9649122807017544
AdaBoost의 정확도  0.9736842105263158


In [13]:
pred = np.array([knn_pred, dt_pred, rf_pred, ada_pred])
print(pred.shape)

pred = np.transpose(pred)
print(pred.shape)

(4, 114)
(114, 4)


In [14]:
lr_final_clf.fit(pred, y_test)
final_pred = lr_final_clf.predict(pred)

print('최종 모델의 정확도:', accuracy_score(y_test, final_pred))

최종 모델의 정확도: 0.9736842105263158
