In [67]:
class Node:
    def __init__(self, feature = None, threshold = None, left_child = None, right_child = None, *, value = None):
        self.value = value
        self.feature = feature
        self.threshold = threshold
        self.left_child = left_child
        self.right_child = right_child
    def is_leaf_node(self):
        return self.value is not None

In [68]:
import numpy as np
from collections import Counter
class DecisionTreeClassifier:
    def __init__(self, min_rows_split, max_depth, number_of_features=None):
        self.min_rows_split = min_rows_split
        self.max_depth = max_depth
        self.number_of_features = number_of_features
        self.root = None
    def _create_tree(self, X, y, depth=0):
        rows, features = X.shape
        labels = len(np.unique(y))
        #stop criteria
        if(depth >= self.max_depth or labels == 1 or rows <self.min_rows_split):
            leaf_value = self._most_common_label(y)
            return Node(value = leaf_value)
        feature_indexes = np.random.choice(features, self.number_of_features, replace = False)
        
        best_feature, best_threshold = this._best_criteria(X, y, feature_indexes)
        left_indexes, right_indexes = self._split(X[:, best_feature], best_threshold)
        
        left_child = self._create_tree(X[left_indexes, :], y[left_indexes], depth+1)
        right_child = self._create_tree(X[right_indexes, :], y[right_indexes], depth+1)
        return Node(best_feature, best_threshold, left_child, right_child)
    def _best_criteria(self, X, y, feature_indexes):
        best_gain = -1
        split_index, split_threshold = None, None
        for feature_index in feature_indexes:
            X_column = X[:, feature_index]
            thersholds = np.unique(X_column)
            for threshold in thresholds:
                gain = self._count_information_gain(y, X_column, threshold)
                if gain > best_gain:
                    best_gain = gain
                    split_index = feature_index
                    split_threshold = threshold
        return split_index, split_threshold
    def _count_information_gain(self, y, X_column, threshold):
        parent_entropy = self._count_entropy(y)
        left_indexes, right_indexes = self._split(X_column, threshold)
        if len(left_idxs) == 0 or len(right_idxs) == 0:
            return 0

        # compute the weighted avg. of the loss for the children
        n = len(y)
        n_l, n_r = len(left_idxs), len(right_idxs)
        e_l, e_r = entropy(y[left_idxs]), entropy(y[right_idxs])
        child_entropy = (n_l / n) * e_l + (n_r / n) * e_r

        # information gain is difference in loss before vs. after split
        ig = parent_entropy - child_entropy
        return ig
    def _split(self, X_column, threshold):
        left_indexes = np.argwhere(X_column <= threshold).flatten()
        right_indexes = np.argwhere(X_column > threshold).flatten()
        return left_indexes, right_indexes
    def _traverse_tree(self, x, node):
        if node.is_leaf_node():
            return node.value

        if x[node.feature] <= node.threshold:
            return self._traverse_tree(x, node.left)
        return self._traverse_tree(x, node.right)
    def _most_common_label(self, y):
        counter = Counter(y)
        return counter.most_common(1)[0][0]
    def _count_entropy(self, y):
        hist = np.bincount(y)
        ps = hist / len(y)
        return -np.sum([p * np.log2(p) for p in ps if p > 0])
    def trainDecisionTree(self, X, y):
        #create tree
        if self.number_of_features > X.shape[0]:
            raise Exception('Too much features')
        self.root = self._create_tree(X, y)
    def predictDecisionTree(self, X):
        return np.array([self._traverse_tree(x, self.root) for x in X])