In [None]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [None]:
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist.data, mnist.target.astype(np.uint8)

In [None]:
estimators = [random_forest_clf, extra_trees_clf, svm_clf]

print("개별 모델 훈련 시작...")
for estimator in estimators:
    print(f"{estimator} 훈련 중...")
    estimator.fit(X_train, y_train)
    print("완료.")

개별 모델 훈련 시작...
RandomForestClassifier(n_jobs=-1, random_state=42) 훈련 중...
완료.
ExtraTreesClassifier(n_jobs=-1, random_state=42) 훈련 중...
완료.
Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(gamma='auto', probability=True, random_state=42))]) 훈련 중...
완료.


In [None]:
voting_clf = VotingClassifier(
    estimators=[
        ('rf', random_forest_clf),
        ('et', extra_trees_clf),
        ('svc', svm_clf)
    ],
    voting='soft' #간접
)

print("앙상블 모델(Voting Classifier) 훈련 중...")
voting_clf.fit(X_train, y_train)

from sklearn.metrics import accuracy_score

앙상블 모델(Voting Classifier) 훈련 중...


In [None]:
all_models = [random_forest_clf, extra_trees_clf, svm_clf, voting_clf]
model_names = ["Random Forest", "Extra Trees", "SVM", "Ensemble (Voting)"]

print("\n[검증 세트(Validation Set) 성능 비교]")
print("-" * 40)
for model, name in zip(all_models, model_names):
    y_pred = model.predict(X_val)
    accuracy = accuracy_score(y_val, y_pred)
    print(f"{name}: {accuracy:.4f}")

print("\n[테스트 세트(Test Set) 성능 비교]")
print("-" * 40)
voting_score = voting_clf.score(X_test, y_test)
print(f"앙상블 모델 테스트 세트 정확도: {voting_score:.4f}")

print("\n(참고: 개별 모델의 테스트 세트 정확도)")
for model, name in zip(all_models[:3], model_names[:3]):
    score = model.score(X_test, y_test)
    print(f"{name}: {score:.4f}")


[검증 세트(Validation Set) 성능 비교]
----------------------------------------
Random Forest: 0.9736
Extra Trees: 0.9743
SVM: 0.9686
Ensemble (Voting): 0.9789

[테스트 세트(Test Set) 성능 비교]
----------------------------------------
앙상블 모델 테스트 세트 정확도: 0.9729

(참고: 개별 모델의 테스트 세트 정확도)
Random Forest: 0.9680
Extra Trees: 0.9703
SVM: 0.9639


In [None]:
X_val_predictions = np.empty((len(X_val), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_val_predictions[:, index] = estimator.predict(X_val)

In [None]:
rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42, n_jobs=-1)
rnd_forest_blender.fit(X_val_predictions, y_val)
print("블렌더 훈련 완료. OOB Score:", rnd_forest_blender.oob_score_)

블렌더 훈련 완료. OOB Score: 0.9738


In [None]:
X_test_predictions = np.empty((len(X_test), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_test_predictions[:, index] = estimator.predict(X_test)

y_pred_blender = rnd_forest_blender.predict(X_test_predictions)
blender_score = accuracy_score(y_test, y_pred_blender)
print(f"수동 블렌더 테스트 세트 정확도: {blender_score:.4f}")

수동 블렌더 테스트 세트 정확도: 0.9688


In [None]:
from sklearn.ensemble import StackingClassifier

print("\n[StackingClassifier 작업 시작]")
X_train_full = np.concatenate((X_train, X_val), axis=0)
y_train_full = np.concatenate((y_train, y_val), axis=0)

stack_clf = StackingClassifier(
    estimators=[
        ('rf', random_forest_clf),
        ('et', extra_trees_clf),
        ('svc', svm_clf)
    ],
    final_estimator=RandomForestClassifier(n_estimators=200, random_state=42),
    cv=3, 
    n_jobs=-1
)

print("StackingClassifier 훈련 중... (SVM 때문에 시간이 오래 걸릴 수 있습니다)")
stack_clf.fit(X_train_full, y_train_full)

stacking_score = stack_clf.score(X_test, y_test)
print(f"StackingClassifier 테스트 세트 정확도: {stacking_score:.4f}")


[StackingClassifier 작업 시작]
StackingClassifier 훈련 중... (SVM 때문에 시간이 오래 걸릴 수 있습니다)
StackingClassifier 테스트 세트 정확도: 0.9772


In [None]:
print("\n" + "="*40)
print(f"1. Voting Classifier (Soft): {voting_score:.4f}")
print(f"2. 수동 블렌더 (Blender)   : {blender_score:.4f}")
print(f"3. Stacking Classifier     : {stacking_score:.4f}")
print("="*40)


1. Voting Classifier (Soft): 0.9729
2. 수동 블렌더 (Blender)   : 0.9688
3. Stacking Classifier     : 0.9772
