In [None]:
X_train, y_train = X_mnist[:50_000], y_mnist[:50_000]
X_valid, y_valid = X_mnist[50_000:60_000], y_mnist[50_000:60_000]
X_test, y_test = X_mnist[60_000:], y_mnist[60_000:]

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

In [None]:
# 각 분류기 모델 생성
random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)
extra_trees_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
svm_clf = LinearSVC(max_iter=100, tol=20, random_state=42) # SVM은 데이터가 많으면 느리므로 LinearSVC 사용
mlp_clf = MLPClassifier(random_state=42)

estimators = [random_forest_clf, extra_trees_clf, svm_clf, mlp_clf]

# 개별 모델 훈련
for estimator in estimators:
    estimator.fit(X_train, y_train)

# 각 모델의 검증 세트 점수 확인
[estimator.score(X_valid, y_valid) for estimator in estimators]

In [None]:
from sklearn.ensemble import VotingClassifier
named_estimators = [
    ("random_forest_clf", random_forest_clf),
    ("extra_trees_clf", extra_trees_clf),
    ("svm_clf", svm_clf),
    ("mlp_clf", mlp_clf),
]

voting_clf = VotingClassifier(named_estimators)
voting_clf.fit(X_train, y_train)\

# 검증 세트 점수 확인
voting_clf.score(X_valid, y_valid)
print("Individual Scores:")
[print(estimator.score(X_test, y_test)) for estimator in estimators]

print("\nVoting Classifier Score:")
print(voting_clf.score(X_test, y_test))

In [None]:
import numpy as np

# 검증 세트에 대한 각 모델의 예측을 모아서 새로운 훈련 데이터(X_val_predictions) 생성
X_val_predictions = np.empty((len(X_valid), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_val_predictions[:, index] = estimator.predict(X_valid)

# 생성된 데이터 확인
print(X_val_predictions)

# 새로운 훈련 세트(예측값들)로 랜덤 포레스트 블렌더 훈련
rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
rnd_forest_blender.fit(X_val_predictions, y_valid)


from sklearn.metrics import accuracy_score

# 테스트 세트에 대해 개별 모델들의 예측값 생성
X_test_predictions = np.empty((len(X_test), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_test_predictions[:, index] = estimator.predict(X_test)

# 블렌더를 통해 최종 예측
y_pred = rnd_forest_blender.predict(X_test_predictions)

# 정확도 평가
accuracy_score(y_test, y_pred)
X_train_full, y_train_full = X_mnist[:60_000], y_mnist[:60_000]
from sklearn.ensemble import StackingClassifier

# 스태킹 분류기 정의 (최종 추정기로 랜덤 포레스트 사용)
stacking_clf = StackingClassifier(
    estimators=named_estimators,
    final_estimator=RandomForestClassifier(random_state=43),
    cv=5
)

stacking_clf.fit(X_train_full, y_train_full)
print(stacking_clf.score(X_test, y_test))