<a href="https://colab.research.google.com/github/NobuAk/colab_AI_master/blob/main/241010_%E6%B1%BA%E5%AE%9A%E6%9C%A8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

# ジニ不純度を計算
def gini_impurity(y):
    classes, counts = np.unique(y, return_counts=True)
    prob = counts / len(y)
    return 1 - np.sum(prob ** 2)

# 情報利得を計算
def information_gain(y, y_left, y_right):
    p = len(y_left) / len(y)
    return gini_impurity(y) - (p * gini_impurity(y_left) + (1 - p) * gini_impurity(y_right))

# データを分割する関数
def split_data(X, y, feature, threshold):
    left_mask = X[:, feature] <= threshold
    right_mask = X[:, feature] > threshold
    return X[left_mask], X[right_mask], y[left_mask], y[right_mask]

# 最良の分割を見つける関数
def best_split(X, y):
    best_gain = -1
    best_feature = None
    best_threshold = None

    for feature in range(X.shape[1]):
        thresholds = np.unique(X[:, feature])
        for threshold in thresholds:
            X_left, X_right, y_left, y_right = split_data(X, y, feature, threshold)
            if len(y_left) == 0 or len(y_right) == 0:
                continue
            gain = information_gain(y, y_left, y_right)
            if gain > best_gain:
                best_gain = gain
                best_feature = feature
                best_threshold = threshold

    return best_feature, best_threshold

# 決定木のノード
class DecisionTreeNode:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature  # 分割に使う特徴量
        self.threshold = threshold  # 分割のしきい値
        self.left = left  # 左の子ノード
        self.right = right  # 右の子ノード
        self.value = value  # クラスの予測値（葉ノードの場合）

# 決定木のクラス
class DecisionTreeClassifier:
    def __init__(self, max_depth=3):
        self.max_depth = max_depth
        self.root = None

    def fit(self, X, y, depth=0):
        # 全て同じクラスか、最大深さに達した場合、葉ノードを作る
        if len(np.unique(y)) == 1 or depth == self.max_depth:
            leaf_value = np.argmax(np.bincount(y))
            return DecisionTreeNode(value=leaf_value)

        # 最良の分割を見つける
        feature, threshold = best_split(X, y)

        if feature is None:
            leaf_value = np.argmax(np.bincount(y))
            return DecisionTreeNode(value=leaf_value)

        # データを分割
        X_left, X_right, y_left, y_right = split_data(X, y, feature, threshold)

        # 左右の子ノードを再帰的に構築
        left_node = self.fit(X_left, y_left, depth + 1)
        right_node = self.fit(X_right, y_right, depth + 1)

        # 現在のノードを返す
        return DecisionTreeNode(feature=feature, threshold=threshold, left=left_node, right=right_node)

    def predict_sample(self, node, x):
        # 葉ノードなら予測値を返す
        if node.value is not None:
            return node.value

        # 特徴量に基づいて左右の枝に進む
        if x[node.feature] <= node.threshold:
            return self.predict_sample(node.left, x)
        else:
            return self.predict_sample(node.right, x)

    def predict(self, X):
        return np.array([self.predict_sample(self.root, x) for x in X])

# データセットの作成
df = pd.DataFrame({
    'Feature1': [2.77, 1.72, 3.67, 3.96, 2.23, 1.25, 3.27, 2.77, 3.47, 2.67],
    'Feature2': [1.78, 1.16, 2.81, 2.61, 1.67, 1.21, 2.59, 2.39, 2.41, 1.79],
    'Label':    [0, 0, 1, 1, 0, 0, 1, 0, 1, 0]
})

# 特徴量とラベルに分ける
X = df[['Feature1', 'Feature2']].values
y = df['Label'].values

# 決定木のトレーニング
tree = DecisionTreeClassifier(max_depth=3)
tree.root = tree.fit(X, y)

# 予測
predictions = tree.predict(X)
print("予測結果:", predictions)

予測結果: [0 0 1 1 0 0 1 0 1 0]
