<p style="float:right;"><i>Created By Maroyi Bisoka on 01/02/2025</i></p>

In [2209]:
import numpy as np
from copy import deepcopy
from queue import Queue

# Cat classification example

| Ear Shape(x0) | Face Shape(x1) | Whiskers(x2) |   Cat(y)  |
|:---------:|:-----------:|:---------:|:------:|
|   Pointy   |   Round     |  Present  |    1   |
|   Floppy   |  Not Round  |  Present  |    1   |
|   Floppy   |  Round      |  Absent   |    0   |
|   Pointy   |  Not Round  |  Present  |    0   |
|   Pointy   |   Round     |  Present  |    1   |
|   Pointy   |   Round     |  Absent   |    1   |
|   Floppy   |  Not Round  |  Absent   |    0   |
|   Pointy   |  Round      |  Absent   |    1   |
|    Floppy  |   Round     |  Absent   |    0   |
|   Floppy   |  Round      |  Absent   |    0   |

We will use **one-hot encoding** to encode the categorical features. They will be as follows:

- Ear Shape: Pointy = 1, Floppy = 0
- Face Shape: Round = 1, Not Round = 0
- Whiskers: Present = 1, Absent = 0

#### In this material We are going to build a decision tree for a cat classification example from scratch

In [2211]:
X_train = np.array([
    [1, 1, 1],
    [0, 0, 1],
    [0, 1, 0],
    [1, 0, 1],
    [1, 1, 1],
    [1, 1, 0],
    [0, 0, 0],
    [1, 1, 0],
    [0, 1, 0],
    [0, 1, 0]
])

y_train = np.array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0])

In [2212]:
# Entropy as a measure of impurity
def compute_entropy(p1):
    if p1 == 0 or p1 == 1:
        return 0
    else:
        return -p1 * np.log2(p1) - (1-p1)*np.log2(1-p1)

In [2213]:
# p1 Means fraction of elements that are classified as 1
p1 = 4/10
compute_entropy(p1)

0.9709505944546686

In [2214]:
# Will be used to split based on a feature if x[feature] == 1 --> left side else right side
# Example given cat classification example and we want to split on Ear shape
# one-hot encoding (pointy = 1 and floppy = 0)
# so for each element in cat classification example :
#     if element[Ear_shape] == 1 (means is pointy):
#         move element[Ear_shape] to left side
#     else:
#         move element[Ear_shape] to right side
def split_left_and_right(X, index_feature):
    left_side = []
    right_side = []

    for i in range(len(X)):
        element = X[i]
        if element[index_feature] == 1:
            left_side.append(i)
        else:
            right_side.append(i)
    return left_side, right_side

In [2215]:
def compute_weighted_entropy(X, y, left_side, right_side, entropy_function):
    # p1_left and w_left calculation
    p1_left = 0 if len(left_side) == 0 else sum(y[left_side]) / len(left_side) # Avoid division by zero  (when trying to find w_entropy where parent_node feature is same as child feature)
    w_left = len(left_side) / len(X)

    # p1_right and w_right calculation
    p1_right = 0 if len(right_side) == 0 else sum(y[right_side]) / len(right_side) # Avoid division by zero  (when trying to find w_entropy where parent_node feature is same as child feature
    w_right = len(right_side) / len(X)
    # weighted_entropy calculation
    w_entropy = w_left*entropy_function(p1_left) + w_right*entropy_function(p1_right)
    return w_entropy

In [2216]:
def compute_information_gain(X, y, left_side, right_side, entropy_function, weighted_entropy_function):
    p1_node = sum(y)/len(y)
    h_node = entropy_function(p1_node)
    w_entropy = weighted_entropy_function(X, y, left_side, right_side, entropy_function)
    return h_node - w_entropy

#### Suppose we are still trying to choose the first split feature (root node)
#### So we have to find information gain of all the 3 feature and choose the best one 

In [2218]:
# Information gain of Ear shape (index_feature = 0)
left_side, right_side = split_left_and_right(X_train, 0)
print('Feature : Ear shape')
print(f'Left side (Pointy) :  {left_side}')
print(f'Right side (Floppy) : {right_side}')
ig = compute_information_gain(X_train, y_train, left_side, right_side, compute_entropy, compute_weighted_entropy)
print(f'Information Gain of Ear shape {ig:.2f}')

Feature : Ear shape
Left side (Pointy) :  [0, 3, 4, 5, 7]
Right side (Floppy) : [1, 2, 6, 8, 9]
Information Gain of Ear shape 0.28


In [2219]:
# Information gain of Face shape (index_feature = 1)
left_side, right_side = split_left_and_right(X_train, 1)
print('Feature : Face shape')
print(f'Left side (Round) :  {left_side}')
print(f'Right side (Not Round) : {right_side}')
ig = compute_information_gain(X_train, y_train, left_side, right_side, compute_entropy, compute_weighted_entropy)
print(f'Information Gain of Face shape {ig:.2f}')

Feature : Face shape
Left side (Round) :  [0, 2, 4, 5, 7, 8, 9]
Right side (Not Round) : [1, 3, 6]
Information Gain of Face shape 0.03


In [2220]:
# Information gain of Whiskers (index_feature = 2)
left_side, right_side = split_left_and_right(X_train, 2)
print('Feature : Whiskers')
print(f'Left side (Present) :  {left_side}')
print(f'Right side (Absent) : {right_side}')
ig = compute_information_gain(X_train, y_train, left_side, right_side, compute_entropy, compute_weighted_entropy)
print(f'Information Gain of Whiskers {ig:.2f}')

Feature : Whiskers
Left side (Present) :  [0, 1, 3, 4]
Right side (Absent) : [2, 5, 6, 7, 8, 9]
Information Gain of Whiskers 0.12


In [2221]:
# We can still do the same thing here in a single loop to find the best Feature to split on
m,n = X_train.shape
features = ['Ear Shape', 'Face Shape', 'Whiskers']
best_feat_idx = -1
best_IG = None
for i in range (n):
    left_side, right_side = split_left_and_right(X_train, i)
    ig = compute_information_gain(X_train, y_train, left_side, right_side, compute_entropy, compute_weighted_entropy)
    print(f'Feature : {features[i]}')
    print(f'Left side  :  {left_side}')
    print(f'Right side : {right_side}')
    print(f'Information Gain : {ig:.2f}')
    print('---------------------------------------')
    if best_IG is None or ig > best_IG:
        best_IG, best_feat_idx = ig, i

print(f'\nBest Feature is {features[best_feat_idx]} with IG {best_IG:.2f}')

Feature : Ear Shape
Left side  :  [0, 3, 4, 5, 7]
Right side : [1, 2, 6, 8, 9]
Information Gain : 0.28
---------------------------------------
Feature : Face Shape
Left side  :  [0, 2, 4, 5, 7, 8, 9]
Right side : [1, 3, 6]
Information Gain : 0.03
---------------------------------------
Feature : Whiskers
Left side  :  [0, 1, 3, 4]
Right side : [2, 5, 6, 7, 8, 9]
Information Gain : 0.12
---------------------------------------

Best Feature is Ear Shape with IG 0.28


<hr />

Among all informations gain:
- Ear shape: **0.28**
- Face shape: **0.03**
- Whiskers: **0.12**
  
We choose **Ear Shape** since it has the highest IG. And we'll recusivelly spit on that feature

### Building our Binary Tree with a recursive function

In [2224]:
class Node:
    def __init__(self):
        self.X = []
        self.p1 = None
        self.IG = None
        self.left = None
        self.right = None
        self.end_node = False
        self.feature_name = None
        self.feature_index = None
        self.split_thres_cont = None

In [2225]:
def build_tree_helper(X, y,  compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
                       compute_information_gain_function, features_names, max_depth, curr_depth, threshold):
    # p1 refer to the fraction of among all elements in the left/right side that are classified as 1 
    # over total elements in the left/right side
    p1 = sum(y) / len(X)
    
    # If True, means we reach purity --> create leaf node
    if p1 == 1 or p1 == 0: 
        node = Node()
        node.end_node = True
        node.p1 = p1
        node.feature_name = int(p1)
        return node
    
    # If True, means we reach max depth --> create leaf node    
    if curr_depth == max_depth: 
        node = Node()
        node.end_node = True
        node.p1 = p1
        # put class 1 if p1 is greater or equal than threshold
        node.feature_name = 1 if node.p1 >= threshold else 0 
        return node
        
    m, n = len(X), len(X[0])

    # features = ['Ear Shape', 'Face Shape', 'Whiskers']
    best_feat_idx = -1
    best_left_side, best_right_side = [], []
    best_IG = None
    
    for i in range (n):
        left_side, right_side = split_left_and_right_function(X, i)
        ig = compute_information_gain_function(X, y, left_side, right_side, compute_entropy_function, compute_weighted_entropy_function)
        if best_IG is None or ig > best_IG:
            best_IG, best_feat_idx = ig, i
            best_left_side, best_right_side = left_side, right_side
    
    node = Node()
    node.feature_name = features_names[best_feat_idx]
    node.feature_index = best_feat_idx
    node.IG = best_IG
    node.X = deepcopy(X)
    
    # ------------------------------------------------------------------------
    # Debugging... 
    # print(f'curr_depth: {curr_depth}')
    # print(f'Best Feature is {features_names[best_feat_idx]} with IG {best_IG:.2f}')
    # print(f'Left side :  {best_left_side} & Right side {best_right_side}')
    # print(f'Left next X {X[best_left_side]} & Next y {y[best_left_side]}')
    # print(f'Right next X {X[best_right_side]} & Next y {y[best_right_side]}')
    # ------------------------------------------------------------------------

    
    # Build left side
    node.left = build_tree_helper(X[best_left_side], y[best_left_side],  compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
                       compute_information_gain_function, features_names, max_depth, curr_depth+1, threshold)
    # Build right side
    node.right = build_tree_helper(X[best_right_side], y[best_right_side],  compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
                       compute_information_gain_function, features_names, max_depth, curr_depth+1, threshold)

    return node

In [2226]:
# For this build_tree function --> the stopping criteria is the depth of the tree
def build_tree(X, y, compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
               compute_information_gain_function, features_names, max_depth, threshold):
    start_depth = 0
    root = build_tree_helper(X, y,  compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
               compute_information_gain_function, features_names, max_depth, start_depth, threshold)
    return root

In [2227]:
max_depth = 2
threshold = 0.5
features_names = ['Ear Shape', 'Face Shape', 'Whiskers']
root = build_tree(X_train, y_train, compute_entropy, split_left_and_right, compute_weighted_entropy, compute_information_gain, 
                  features_names, max_depth, threshold)

In [2228]:
# Display tree 
def level_order(root):
    q = Queue()
    print('--------------------------------------------')
    q.put(root) # endqueue
    while not q.empty():
        root = q.get() # dequeue
        if root.feature_name != 1 and root.feature_name != 0:
            print(f'Node : *** {root.feature_name} ***')
        if root.left:
            print(f'if feature {root.feature_name} is 1:')
            if root.left.end_node is True:
                ans = '1 (Cat)' if root.left.feature_name == 1 else '0 (Not Cat)'
                print(f'\t Reached leaf node : {ans}')
            else:
                print(f'\t Go to left: {root.left.feature_name}')
            q.put(root.left)
        if root.right:
            print(f'else:')
            if root.right.end_node is True:
                ans = '1 (Cat)' if root.right.feature_name == 1 else '0 (Not Cat)'
                print(f'\t Reached leaf node : {ans}')
            else:
                print(f'\t Got to right: {root.right.feature_name}')
            q.put(root.right)
        
        if root.feature_name != 1 and root.feature_name != 0:
            print('--------------------------------------------')

In [2229]:
# Displaying Our Decision Tree
# REMEMBER !!!  We used one-hot encoding to encode the categorical features as follows:
    # Ear Shape: Pointy = 1, Floppy = 0
    # Face Shape: Round = 1, Not Round = 0
    # Whiskers: Present = 1, Absent = 0
level_order(root)

--------------------------------------------
Node : *** Ear Shape ***
if feature Ear Shape is 1:
	 Go to left: Face Shape
else:
	 Got to right: Whiskers
--------------------------------------------
Node : *** Face Shape ***
if feature Face Shape is 1:
	 Reached leaf node : 1 (Cat)
else:
	 Reached leaf node : 0 (Not Cat)
--------------------------------------------
Node : *** Whiskers ***
if feature Whiskers is 1:
	 Reached leaf node : 1 (Cat)
else:
	 Reached leaf node : 0 (Not Cat)
--------------------------------------------


In [2230]:
def compute_prediction_helper(root, x_test):
    # If feature value is 1 : go to left 
    # else : go to right
    ans = None
    if root.end_node:
        return root.feature_name
    feature_index = root.feature_index
    if x_test[feature_index] == 1:
        ans = compute_prediction_helper(root.left, x_test)
    else:
        ans = compute_prediction_helper(root.right, x_test)

    return ans

In [2231]:
# Compute single prediction
def compute_prediction(root, test_example):
    prediction = compute_prediction_helper(root, test_example)
    return prediction

In [2232]:
# Testing our Decision Tree on our X_train (training dataset)
# if pred is 1 --> Cat 
# if pred is 0 --> Not Cat
for i, test_example in enumerate(X_train):
    pred = compute_prediction(root, test_example)
    string_note = ' (Wrong prediction)' if pred != y_train[i] else ''
    print(f'Example : {test_example}\t& prediction : {pred} actual : {y_train[i]}{string_note}')

Example : [1 1 1]	& prediction : 1 actual : 1
Example : [0 0 1]	& prediction : 1 actual : 1
Example : [0 1 0]	& prediction : 0 actual : 0
Example : [1 0 1]	& prediction : 0 actual : 0
Example : [1 1 1]	& prediction : 1 actual : 1
Example : [1 1 0]	& prediction : 1 actual : 1
Example : [0 0 0]	& prediction : 0 actual : 0
Example : [1 1 0]	& prediction : 1 actual : 1
Example : [0 1 0]	& prediction : 0 actual : 0
Example : [0 1 0]	& prediction : 0 actual : 0


# Continuous valued features

| Ear Shape(x0) | Face Shape(x1) | Whiskers(x2) | Weight(x3)|Cat(y)  |
|:---------:|:-----------:|:---------:|:------:| :---------:|
|   Pointy   |   Round     |  Present  |    7.2    |    1   |
|   Floppy   |  Not Round  |  Present  |    8.8    |    1   |
|   Floppy   |  Round      |  Absent   |    15     |    0   |
|   Pointy   |  Not Round  |  Present  |    9.2    |    0   |
|   Pointy   |   Round     |  Present  |    8.4    |    1   |
|   Pointy   |   Round     |  Absent   |    7.6    |    1   |
|   Floppy   |  Not Round  |  Absent   |    11     |    0   |
|   Pointy   |  Round      |  Absent   |    10.2   |    1   |
|    Floppy  |   Round     |  Absent   |    18     |    0   |
|   Floppy   |  Round      |  Absent   |    20     |    0   |

In [2235]:
X_train = np.array([
    [1, 1, 1, 7.2],
    [0, 0, 1, 8.8],
    [0, 1, 0,  15],
    [1, 0, 1, 9.2],
    [1, 1, 1, 8.4],
    [1, 1, 0, 7.6],
    [0, 0, 0,  11],
    [1, 1, 0,10.2],
    [0, 1, 0,  18],
    [0, 1, 0,  20]
])

y_train = np.array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0])

In [2236]:
def split_continious_feature_helper(X, val, index_feature):
    left_side, right_side = [], []
    for i in range(len(X)):
        element = X[i]
        if element[index_feature] <= val:
            left_side.append(i)
        else:
            right_side.append(i)
    return left_side, right_side

In [2237]:

def split_continious_feature(X, y, index_feature, split_continious_feature_helper_function, compute_information_gain_function,
                             compute_entropy_function, compute_weighted_entropy_function):
    m, n = X.shape
    best_left, best_right = [], []
    feat_data = X[:, index_feature]
    sorted_arr = np.sort(feat_data)
    split_criteria = np.array([(sorted_arr[i] + sorted_arr[i+1]) / 2 for i in range(len(sorted_arr)-1)])
    best_IG = best_split_val = None
    for val in split_criteria:
        left_side, right_side = split_continious_feature_helper_function(X, val, index_feature)
        ig = compute_information_gain_function(X, y, left_side, right_side, compute_entropy_function, compute_weighted_entropy_function)
        if best_IG is None or ig > best_IG:
            best_IG, best_split_val = ig, val
            best_left, best_right = left_side, right_side
    return best_left, best_right, best_IG, best_split_val

In [2238]:
left, right, ig, thres = split_continious_feature(X_train, y_train, 3, split_continious_feature_helper, compute_information_gain,
                        compute_entropy, compute_weighted_entropy)

In [2239]:
left, right, ig, thres

([0, 1, 4, 5], [2, 3, 6, 7, 8, 9], 0.6099865470109875, 9.0)

<h4>
    <i>Improve buid_tree_helper function to handle continious valued features</i>
</h4> 

In [2241]:
def build_tree_helper(X, y,  compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
                      compute_information_gain_function, split_continious_feature_function, split_continious_feature_helper_function,
                      features_names, max_depth, curr_depth, threshold, continious_feat_col):
    # p1 refer to the fraction of among all elements in the left/right side that are classified as 1 
    # over total elements in the left/right side
    p1 = sum(y) / len(X)
    
    # If True, means we reach purity --> create leaf node
    if p1 == 1 or p1 == 0: 
        node = Node()
        node.end_node = True
        node.p1 = p1
        node.feature_name = int(p1)
        return node
    
    # If True, means we reach max depth --> create leaf node    
    if curr_depth == max_depth: 
        node = Node()
        node.end_node = True
        node.p1 = p1
        # put class 1 if p1 is greater or equal than threshold
        node.feature_name = 1 if node.p1 >= threshold else 0 
        return node
        
    m, n = len(X), len(X[0])

    best_feat_idx = -1
    best_left_side, best_right_side = [], []
    best_IG = split_thres_cont = None
    
    for i in range (n):
        if i in continious_feat_col: # Feature is a continious 
            left_side, right_side, ig, split_thres_cont = split_continious_feature_function(X, y, i, split_continious_feature_helper_function, compute_information_gain_function,
                                                                                            compute_entropy_function, compute_weighted_entropy_function)
        else : # Feature is a categorical 
            left_side, right_side = split_left_and_right_function(X, i)
            ig = compute_information_gain_function(X, y, left_side, right_side, compute_entropy_function, compute_weighted_entropy_function)
        
        if best_IG is None or ig > best_IG:
            best_IG, best_feat_idx = ig, i
            best_left_side, best_right_side = left_side, right_side
    
    node = Node()
    node.IG = best_IG
    node.X = deepcopy(X)
    node.feature_index = best_feat_idx
    node.feature_name = features_names[best_feat_idx]
    # If best feature choosed to split on is a continious feature --> store it split threshold
    if best_feat_idx in continious_feat_col:
        node.split_thres_cont = split_thres_cont
    
    
    # Build left side
    node.left = build_tree_helper(X[best_left_side], y[best_left_side],  compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
                                  compute_information_gain_function, split_continious_feature_function, split_continious_feature_helper_function, 
                                  features_names, max_depth, curr_depth+1, threshold, continious_feat_col)
    # Build right side
    node.right = build_tree_helper(X[best_right_side], y[best_right_side],  compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
                                   compute_information_gain_function, split_continious_feature_function, split_continious_feature_helper_function,
                                   features_names, max_depth, curr_depth+1, threshold, continious_feat_col)

    return node

In [2242]:
# For this build_tree function --> the stopping criteria is the depth of the tree
def build_tree(X, y, compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
               compute_information_gain_function, split_continious_feature_function, split_continious_feature_helper_function,
               features_names, max_depth, threshold, continious_feat_col=[]):
    start_depth = 0
    root = build_tree_helper(X, y,  compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
                             compute_information_gain_function, split_continious_feature_function, split_continious_feature_helper_function,
                             features_names, max_depth, start_depth, threshold, continious_feat_col)
    return root

In [2243]:
# Display tree 
def level_order(root):
    q = Queue()
    print('--------------------------------------------')
    q.put(root) # endqueue
    while not q.empty():
        root = q.get() # dequeue
        if root.feature_name != 1 and root.feature_name != 0:
            print(f'Node : *** {root.feature_name} ***')
        if root.left:
            string = f'if feature {root.feature_name}'
            string += ' is 1:' if root.split_thres_cont is None else f' <= {root.split_thres_cont}:'
            print(string)
            if root.left.end_node is True:
                ans = '1 (Cat)' if root.left.feature_name == 1 else '0 (Not Cat)'
                print(f'\t Reached leaf node : {ans}')
            else:
                print(f'\t Go to left: {root.left.feature_name}')
                
            q.put(root.left)
        if root.right:
            print(f'else:')
            if root.right.end_node is True:
                ans = '1 (Cat)' if root.right.feature_name == 1 else '0 (Not Cat)'
                print(f'\t Reached leaf node : {ans}')
            else:
                print(f'\t Got to right: {root.right.feature_name}')
            q.put(root.right)
        
        if root.feature_name != 1 and root.feature_name != 0:
            print('--------------------------------------------')

In [2244]:
max_depth = 2
threshold = 0.5
features_names = ['Ear Shape', 'Face Shape', 'Whiskers', 'Weight']
continious_feat_col = [3]
root = build_tree(X_train, y_train, compute_entropy, split_left_and_right, compute_weighted_entropy, compute_information_gain, 
                  split_continious_feature, split_continious_feature_helper, features_names, max_depth, threshold, continious_feat_col)

In [2245]:
level_order(root)

--------------------------------------------
Node : *** Weight ***
if feature Weight <= 9.0:
	 Reached leaf node : 1 (Cat)
else:
	 Got to right: Ear Shape
--------------------------------------------
Node : *** Ear Shape ***
if feature Ear Shape is 1:
	 Reached leaf node : 1 (Cat)
else:
	 Reached leaf node : 0 (Not Cat)
--------------------------------------------


In [2246]:
def compute_prediction_helper(root, x_test):
    # If feature value is 1 : go to left 
    # else : go to right
    ans = None
    if root.end_node:
        return root.feature_name
    if root.split_thres_cont is None: # Categorical feature valued
        if x_test[root.feature_index] == 1:
            ans = compute_prediction_helper(root.left, x_test)
        else:
            ans = compute_prediction_helper(root.right, x_test)
    else: # Continious feature valued
        if x_test[root.feature_index] <= root.split_thres_cont:
            ans = compute_prediction_helper(root.left, x_test)
        else:
            ans = compute_prediction_helper(root.right, x_test)
    return ans

In [2247]:
# Compute single prediction
def compute_prediction(root, test_example):
    prediction = compute_prediction_helper(root, test_example)
    return prediction

In [2248]:
# Prediction for tree with max_depth 2 on X_train
for i, test_example in enumerate(X_train):
    pred = compute_prediction(root, test_example)
    string_note = ' (Wrong prediction)' if pred != y_train[i] else ''
    print(f'Example : {test_example}\t& prediction : {pred} actual : {y_train[i]}{string_note}')

Example : [1.  1.  1.  7.2]	& prediction : 1 actual : 1
Example : [0.  0.  1.  8.8]	& prediction : 1 actual : 1
Example : [ 0.  1.  0. 15.]	& prediction : 0 actual : 0
Example : [1.  0.  1.  9.2]	& prediction : 1 actual : 0 (Wrong prediction)
Example : [1.  1.  1.  8.4]	& prediction : 1 actual : 1
Example : [1.  1.  0.  7.6]	& prediction : 1 actual : 1
Example : [ 0.  0.  0. 11.]	& prediction : 0 actual : 0
Example : [ 1.   1.   0.  10.2]	& prediction : 1 actual : 1
Example : [ 0.  1.  0. 18.]	& prediction : 0 actual : 0
Example : [ 0.  1.  0. 20.]	& prediction : 0 actual : 0


In [2249]:
# Change max_depth to 5 
max_depth = 5
root = build_tree(X_train, y_train, compute_entropy, split_left_and_right, compute_weighted_entropy, compute_information_gain, 
                  split_continious_feature, split_continious_feature_helper, features_names, max_depth, threshold, continious_feat_col)

In [2250]:
level_order(root)

--------------------------------------------
Node : *** Weight ***
if feature Weight <= 9.0:
	 Reached leaf node : 1 (Cat)
else:
	 Got to right: Ear Shape
--------------------------------------------
Node : *** Ear Shape ***
if feature Ear Shape is 1:
	 Go to left: Face Shape
else:
	 Reached leaf node : 0 (Not Cat)
--------------------------------------------
Node : *** Face Shape ***
if feature Face Shape is 1:
	 Reached leaf node : 1 (Cat)
else:
	 Reached leaf node : 0 (Not Cat)
--------------------------------------------


In [2251]:
# Prediction for tree with max_depth 5 on X_train
for i, test_example in enumerate(X_train):
    pred = compute_prediction(root, test_example)
    string_note = ' (wrong prediction)' if pred != y_train[i] else ''
    print(f'Example : {test_example}\t& prediction : {pred} actual : {y_train[i]}{string_note}')

Example : [1.  1.  1.  7.2]	& prediction : 1 actual : 1
Example : [0.  0.  1.  8.8]	& prediction : 1 actual : 1
Example : [ 0.  1.  0. 15.]	& prediction : 0 actual : 0
Example : [1.  0.  1.  9.2]	& prediction : 0 actual : 0
Example : [1.  1.  1.  8.4]	& prediction : 1 actual : 1
Example : [1.  1.  0.  7.6]	& prediction : 1 actual : 1
Example : [ 0.  0.  0. 11.]	& prediction : 0 actual : 0
Example : [ 1.   1.   0.  10.2]	& prediction : 1 actual : 1
Example : [ 0.  1.  0. 18.]	& prediction : 0 actual : 0
Example : [ 0.  1.  0. 20.]	& prediction : 0 actual : 0
