In [5]:
# 필요한 라이브러리 임포트
import numpy as np
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from scipy.stats import mode

# 1. 데이터 준비
X, y = make_moons(n_samples=10000, noise=0.4, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. 결정 트리 하이퍼파라미터 최적화
param_grid = {
    'max_depth': [3, 6, 9, 12],
    'max_leaf_nodes': [10, 15, 20, 25],
    'min_samples_split': [2, 5, 10]
}
tree_clf = DecisionTreeClassifier(random_state=42)
grid_search = GridSearchCV(tree_clf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_tree = grid_search.best_estimator_

print("Best Parameters:", grid_search.best_params_)
y_pred_tree = best_tree.predict(X_test)
print("Single Tree Accuracy:", accuracy_score(y_test, y_pred_tree))

# 3. 랜덤 포레스트 구현
n_trees = 100
n_samples = len(X_train)
subset_accuracies = []

trees = []
for _ in range(n_trees):
    indices = np.random.choice(n_samples, n_samples, replace=True)
    X_subset, y_subset = X_train[indices], y_train[indices]
    tree = DecisionTreeClassifier(max_depth=grid_search.best_params_['max_depth'],
                                   max_leaf_nodes=grid_search.best_params_['max_leaf_nodes'],
                                   min_samples_split=grid_search.best_params_['min_samples_split'],
                                   random_state=42)
    tree.fit(X_subset, y_subset)
    trees.append(tree)
    y_pred = tree.predict(X_test)
    subset_accuracies.append(accuracy_score(y_test, y_pred))

print("Average Single Tree Accuracy:", np.mean(subset_accuracies))

# 4. 다수결 앙상블
all_predictions = np.array([tree.predict(X_test) for tree in trees])
final_predictions = mode(all_predictions, axis=0).mode.flatten()

ensemble_accuracy = accuracy_score(y_test, final_predictions)
print("Ensemble Accuracy:", ensemble_accuracy)



Best Parameters: {'max_depth': 9, 'max_leaf_nodes': 25, 'min_samples_split': 2}
Single Tree Accuracy: 0.872
Average Single Tree Accuracy: 0.862255
Ensemble Accuracy: 0.8675
