In [13]:
%pip install seaborn scikit-learn pandas koreanize-matplotlib

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [26]:
# === 랜덤포레스트(아이리스) 트리 시각화: 완전 실행 스크립트 ===
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import koreanize_matplotlib
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import plot_tree

# 1) 데이터 준비
iris_data = load_iris()
feature_names = ["꽃받침 길이(cm)", "꽃받침 너비(cm)", "꽃잎 길이(cm)", "꽃잎 너비(cm)"]
iris_df = pd.DataFrame(iris_data.data, columns=feature_names)
iris_df["품종번호"] = iris_data.target

# 한국어 라벨(보고서/플롯용)
eng_to_kor = {
    "setosa": "부채붓꽃",
    "versicolor": "버시컬러",
    "virginica": "버지니카",
}
class_names_kor = [eng_to_kor[name] for name in iris_data.target_names]

X = iris_df[feature_names]
y = iris_df["품종번호"]

# 2) 데이터 분할 + 학습
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# 3) 대표 나무 1개 시각화 (첫 번째 추정기)
plt.figure(figsize=(14, 10))
plot_tree(
    rf.estimators_[1],
    feature_names=feature_names,
    class_names=class_names_kor,
    rounded=False,
    impurity=False,    # 지니 불순도 노출
    proportion=False,   # 노드 비율 표시
    filled=True
)
plt.title("꽃 분류 그래프")
plt.tight_layout()
plt.savefig("꽃 분류.png", dpi=200)
plt.close()

# 6) 콘솔 요약 출력
from sklearn.metrics import accuracy_score
y_pred = rf.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print("=== 학습 요약 ===")
print(f"테스트 정확도: {acc:.3f}")



=== 학습 요약 ===
테스트 정확도: 0.900
