In [None]:
## 必要なライブラリのインポート
import numpy as np
from decision_tree import MyDecisionTreeClassifier

import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier as SkDT

np.random.seed(42)

In [None]:
cancer = datasets.load_breast_cancer()
X = cancer.data
y = cancer.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"train: {X_train.shape}, test: {X_test.shape}")

: 

In [None]:
## 自作決定木の学習と予測

my_tree = MyDecisionTreeClassifier(max_depth=3, min_samples_split=2)
my_tree.fit(X_train, y_train)

y_pred = my_tree.predict(X_test)
print(f"My Decision Tree accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

## 2次元射影で決定境界を可視化
pca = PCA(n_components=2)
X2d = pca.fit_transform(X_train_scaled)

# メッシュ作成
x_min, x_max = X2d[:, 0].min() - 1, X2d[:, 0].max() + 1
y_min, y_max = X2d[:, 1].min() - 1, X2d[:, 1].max() + 1
xx, yy = np.meshgrid(
    np.linspace(x_min, x_max, 200),
    np.linspace(y_min, y_max, 200)
)

# メッシュ点を元の特徴量空間に逆変換して予測
mesh_scaled      = pca.inverse_transform(np.c_[xx.ravel(), yy.ravel()])
mesh_original    = scaler.inverse_transform(mesh_scaled)
Z = my_tree.predict(mesh_original).reshape(xx.shape)

plt.figure(figsize=(10, 6))
plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.coolwarm)
plt.scatter(X2d[:, 0], X2d[:, 1],
            c=y_train, edgecolor='k', s=50, cmap=plt.cm.coolwarm)
plt.title("Decision Boundary of My Decision Tree")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.show()

In [None]:
# Scikit-learnの決定木と比較
sk_tree = SkDT(max_depth=3, min_samples_split=2, random_state=42)
sk_tree.fit(X_train, y_train)

sk_pred = sk_tree.predict(X_test)
print(f"Scikit-learn Decision Tree accuracy: {accuracy_score(y_test, sk_pred):.4f}")
print("Confusion Matrix:\n", confusion_matrix(y_test, sk_pred))

In [None]:
# ハイパーパラメータ実験: max_depthを変化させた精度曲線

# %% 
train_scores, test_scores = [], []

for depth in range(1, 8):
    model = MyDecisionTreeClassifier(max_depth=depth, min_samples_split=2)
    model.fit(X_train, y_train)
    train_scores.append(accuracy_score(y_train, model.predict(X_train)))
    test_scores.append(accuracy_score(y_test, model.predict(X_test)))

plt.figure(figsize=(10, 6))
plt.plot(range(1, 8), train_scores, label='Train Accuracy', marker='o')
plt.plot(range(1, 8), test_scores, label='Test Accuracy', marker='s')
plt.title("Decision Tree Accuracy vs Max Depth")
plt.xlabel("Max Depth")
plt.ylabel("Accuracy")
plt.xticks(range(1, 8))
plt.legend()
plt.show()