In [None]:
import numpy as np
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value # only leaf nodes have value
class DecisionTree():
    def __init__(self):
        self.root=None
    def _entropy(self,y):
        classes,count=np.unique(y,return_counts=True)
        prob=count/len(y)
        return -np.sum(prob*np.log2(prob+1e-9)) #avoids taking log(0), which causes errors.
    def _split(self ,X,y,feature_index,threshold):
        leftmask=X[:,feature_index]<=threshold
        rightmask=~leftmask
        return X[leftmask],y[leftmask],X[rightmask],y[rightmask]
    def best_split(self,X,y):
        best_gain=-1
        best_feature=None
        best_threshold=None
        parententropy=self._entropy(y)
        n_features=X.shape[1]
        for feature_index in range(n_features):
            thresholds = np.unique(X[:, feature_index])
            for threshold in thresholds:
                X_left, y_left, X_right, y_right = self._split(X, y, feature_index, threshold)
                if len(y_left) == 0 or len(y_right) == 0:
                    continue
                left_entropy = self._entropy(y_left)
                right_entropy = self._entropy(y_right)
                weighted_entropy = (len(y_left) / len(y)) * left_entropy + (len(y_right) / len(y)) * right_entropy
                info_gain = parententropy - weighted_entropy
                if info_gain > best_gain:
                    best_gain = info_gain
                    best_feature = feature_index
                    best_threshold = threshold
        return best_feature, best_threshold
    def _build_tree(self,X,y,depth=0,max_depth=3):
        # If all labels are the same or max depth is reached, return a leaf node
        if len(set(y)) == 1 or depth >= max_depth:
            leaf_value = max(set(y), key=list(y).count)
            return Node(value=leaf_value)
        feature, threshold = self.best_split(X, y)
        if feature is None:  #if we dont get nice info gain it is better to stop spliting their so return the node
            leaf_value = max(set(y), key=list(y).count)
            return Node(value=leaf_value)
        X_left, y_left, X_right, y_right = self._split(X, y, feature, threshold)
        left_child = self._build_tree(X_left, y_left, depth + 1, max_depth)
        right_child = self._build_tree(X_right, y_right, depth + 1, max_depth)

        return Node(feature, threshold, left_child, right_child)
    def predict_sample(self,node, sample):
        if node.value is not None:
            return node.value
        if sample[node.feature] <= node.threshold:
            return self.predict_sample(node.left, sample)
        else:
            return self.predict_sample(node.right, sample)
    def predict(self, X):
        return [self.predict_sample(self.root, sample) for sample in X]
    def fit(self, X, y, max_depth=3):
        self.root = self._build_tree(X, y, 0, max_depth)

In [21]:
X = np.array([[1, 1], [2, 1], [3, 2], [6, 5], [7, 8], [8, 9]])
y = np.array([0, 0, 0, 1, 1, 1])

In [22]:
tree = DecisionTree()
tree.fit(X, y, max_depth=3)

print("Predictions:", tree.predict(X))

Predictions: [np.int64(0), np.int64(0), np.int64(0), np.int64(1), np.int64(1), np.int64(1)]


In [23]:
# Features: [Age, Salary]
X = np.array([
    [22, 25],
    [25, 30],
    [47, 60],
    [52, 80],
    [46, 50],
    [56, 90],
    [28, 40],
    [30, 60]
])

# Target: 0 = No, 1 = Yes
y = np.array([0, 0, 1, 1, 1, 1, 0, 0])

In [24]:
tree = DecisionTree()
tree.fit(X, y, max_depth=3)

predictions = tree.predict(X)
print("Predictions:", predictions)
print("Actual:     ", y.tolist())

Predictions: [np.int64(0), np.int64(0), np.int64(1), np.int64(1), np.int64(1), np.int64(1), np.int64(0), np.int64(0)]
Actual:      [0, 0, 1, 1, 1, 1, 0, 0]


In classic decision tree algorithms (like ID3, C4.5, or CART):

- Each split is made independently.

- At every node, we search all features and all thresholds again.

- If the same feature is still the best choice, we use it again.

if you want to prevent using the same feature again (e.g., for some educational or constrained reason), you'd need to:

- Track used features, and

- Remove them from the candidate list in best_split().


