<p style="float:right;"><i>Created By Maroyi Bisoka on 01/02/2025</i></p>

In [1365]:
import numpy as np
from copy import deepcopy
from queue import Queue

# Cat classification example

| Ear Shape(x0) | Face Shape(x1) | Whiskers(x2) |   Cat(y)  |
|:---------:|:-----------:|:---------:|:------:|
|   Pointy   |   Round     |  Present  |    1   |
|   Floppy   |  Not Round  |  Present  |    1   |
|   Floppy   |  Round      |  Absent   |    0   |
|   Pointy   |  Not Round  |  Present  |    0   |
|   Pointy   |   Round     |  Present  |    1   |
|   Pointy   |   Round     |  Absent   |    1   |
|   Floppy   |  Not Round  |  Absent   |    0   |
|   Pointy   |  Round      |  Absent   |    1   |
|    Floppy  |   Round     |  Absent   |    0   |
|   Floppy   |  Round      |  Absent   |    0   |

We will use **one-hot encoding** to encode the categorical features. They will be as follows:

- Ear Shape: Pointy = 1, Floppy = 0
- Face Shape: Round = 1, Not Round = 0
- Whiskers: Present = 1, Absent = 0

#### In this material We are going to build a decision tree for a cat classification example from scratch

In [1367]:
X_train = np.array([
    [1, 1, 1],
    [0, 0, 1],
    [0, 1, 0],
    [1, 0, 1],
    [1, 1, 1],
    [1, 1, 0],
    [0, 0, 0],
    [1, 1, 0],
    [0, 1, 0],
    [0, 1, 0]
])

y_train = np.array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0])

In [1368]:
# Entropy as a measure of impurity
def compute_entropy(p1):
    if p1 == 0 or p1 == 1:
        return 0
    else:
        return -p1 * np.log2(p1) - (1-p1)*np.log2(1-p1)

In [1369]:
# p1 Means fraction of elements that are classified as 1
p1 = 4/10
entropy(p1)

0.9709505944546686

In [1370]:
# Will be used to split based on a feature if x[feature] == 1 --> left side else right side
# Example given cat classification example and we want to split on Ear shape
# one-hot encoding (pointy = 1 and floppy = 0)
# so for each element in cat classification example :
#     if element[Ear_shape] == 1 (means is pointy):
#         move element[Ear_shape] to left side
#     else:
#         move element[Ear_shape] to right side
def split_left_and_right(X, index_feature):
    left_side = []
    right_side = []

    for i in range(len(X)):
        element = X[i]
        if element[index_feature] == 1:
            left_side.append(i)
        else:
            right_side.append(i)
    return left_side, right_side

In [1371]:
def compute_weighted_entropy(X, y, left_side, right_side, entropy_function):
    # p1_left and w_left calculation
    p1_left = 0 if len(left_side) == 0 else sum(y[left_side]) / len(left_side) # Avoid division by zero  (when trying to find w_entropy where parent_node feature is same as child feature)
    w_left = len(left_side) / len(X)

    # p1_right and w_right calculation
    p1_right = 0 if len(right_side) == 0 else sum(y[right_side]) / len(right_side) # Avoid division by zero  (when trying to find w_entropy where parent_node feature is same as child feature
    w_right = len(right_side) / len(X)
    # weighted_entropy calculation
    w_entropy = w_left*entropy_function(p1_left) + w_right*entropy_function(p1_right)
    return w_entropy

In [1372]:
def compute_information_gain(X, y, left_side, right_side, entropy_function, weighted_entropy_function):
    p1_node = sum(y)/len(y)
    h_node = entropy_function(p1_node)
    w_entropy = weighted_entropy_function(X, y, left_side, right_side, entropy_function)
    return h_node - w_entropy

#### Suppose we are still trying to choose the first split feature (root node)
#### So we have to find information gain of all the 3 feature and choose the best one 

In [1374]:
# Information gain of Ear shape (index_feature = 0)
left_side, right_side = split_left_and_right(X_train, 0)
print('Feature : Ear shape')
print(f'Left side (Pointy) :  {left_side}')
print(f'Right side (Floppy) : {right_side}')
ig = compute_information_gain(X_train, y_train, left_side, right_side, compute_entropy, compute_weighted_entropy)
print(f'Information Gain of Ear shape {ig:.2f}')

Feature : Ear shape
Left side (Pointy) :  [0, 3, 4, 5, 7]
Right side (Floppy) : [1, 2, 6, 8, 9]
Information Gain of Ear shape 0.28


In [1375]:
# Information gain of Face shape (index_feature = 1)
left_side, right_side = split_left_and_right(X_train, 1)
print('Feature : Face shape')
print(f'Left side (Round) :  {left_side}')
print(f'Right side (Not Round) : {right_side}')
ig = compute_information_gain(X_train, y_train, left_side, right_side, compute_entropy, compute_weighted_entropy)
print(f'Information Gain of Face shape {ig:.2f}')

Feature : Face shape
Left side (Round) :  [0, 2, 4, 5, 7, 8, 9]
Right side (Not Round) : [1, 3, 6]
Information Gain of Face shape 0.03


In [1376]:
# Information gain of Whiskers (index_feature = 2)
left_side, right_side = split_left_and_right(X_train, 2)
print('Feature : Whiskers')
print(f'Left side (Present) :  {left_side}')
print(f'Right side (Absent) : {right_side}')
ig = compute_information_gain(X_train, y_train, left_side, right_side, compute_entropy, compute_weighted_entropy)
print(f'Information Gain of Whiskers {ig:.2f}')

Feature : Whiskers
Left side (Present) :  [0, 1, 3, 4]
Right side (Absent) : [2, 5, 6, 7, 8, 9]
Information Gain of Whiskers 0.12


<hr />

Among all informations gain:
- Ear shape: **0.28**
- Face shape: **0.03**
- Whiskers: **0.12**
  
We choose **Ear Shape** since it has the highest IG. And we'll recusivelly spit on that feature

In [1378]:
# We can still do the same thing here in a loop
m,n = X_train.shape
features = ['Ear Shape', 'Face Shape', 'Whiskers']
best_feat_idx = -1
best_IG = None
for i in range (n):
    left_side, right_side = split_left_and_right(X_train, i)
    ig = compute_information_gain(X_train, y_train, left_side, right_side, compute_entropy, compute_weighted_entropy)
    if best_IG is None or ig > best_IG:
        best_IG, best_feat_idx = ig, i

print(f'Best Feature is {features[best_feat_idx]} with IG {best_IG:.2f}')

Best Feature is Ear Shape with IG 0.28


### Building our Binary Tree with a recursive function

In [1380]:
class Node:
    def __init__(self):
        self.X = []
        self.p1 = -1
        self.IG = None
        self.left = None
        self.right = None
        self.end_node = False
        self.feature_name = None
        self.feature_index = None

In [1381]:
def build_tree_helper(X, y,  compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
                       compute_information_gain_function, max_depth, curr_depth):
    if curr_depth == max_depth: # This refers to our stopping criteria
        node = Node()
        node.end_node = True
        node.p1 = sum(y) / len(X)
        return node
        
    m, n = len(X), len(X[0])

    features = ['Ear Shape', 'Face Shape', 'Whiskers']
    best_feat_idx = -1
    best_left_side, best_right_side = [], []
    best_IG = None
    
    for i in range (n):
        left_side, right_side = split_left_and_right_function(X, i)
        ig = compute_information_gain_function(X, y, left_side, right_side, compute_entropy_function, compute_weighted_entropy_function)
        if best_IG is None or ig > best_IG:
            best_IG, best_feat_idx = ig, i
            best_left_side, best_right_side = left_side, right_side
    
    node = Node()
    node.feature_name = features[best_feat_idx]
    node.feature_index = best_feat_idx
    node.IG = best_IG
    node.X = deepcopy(X)
    
    # ------------------------------------------------------------------------
    # Debugging... 
    # print(f'curr_depth: {curr_depth}')
    # print(f'Best Feature is {features[best_feat_idx]} with IG {best_IG:.2f}')
    # print(f'Left side :  {best_left_side} & Right side {best_right_side}')
    # print(f'Left next X {X[best_left_side]} & Next y {y[best_left_side]}')
    # print(f'Right next X {X[best_right_side]} & Next y {y[best_right_side]}')
    # ------------------------------------------------------------------------
    
    # Build left side
    node.left = build_tree_helper(X[best_left_side], y[best_left_side],  compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
                       compute_information_gain_function, max_depth, curr_depth+1)
    # Build right side
    node.right = build_tree_helper(X[best_right_side], y[best_right_side],  compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
                       compute_information_gain_function, max_depth, curr_depth+1)
    
    if node.left.end_node is True and node.right.end_node is True:
        node.left.feature_name  = 1 if node.left.p1 > node.right.p1 else 0
        node.right.feature_name = 1 if node.right.p1 > node.left.p1 else 0

    return node

In [1382]:
def build_tree(X, y, compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
               compute_information_gain_function, max_depth):
    
    root = build_tree_helper(X, y,  compute_entropy_function,  split_left_and_right_function, compute_weighted_entropy_function, 
               compute_information_gain_function, max_depth, 0)
    return root

In [1383]:
root = build_tree(X_train, y_train, compute_entropy, split_left_and_right, compute_weighted_entropy, compute_information_gain, 2)

In [1384]:
# Display tree 
def level_order(root, max_depth):
    q = Queue()
    print('--------------------------------------------')
    q.put(root) # endqueue
    while not q.empty():
        root = q.get() # dequeue
        if root.feature_name != 1 and root.feature_name != 0:
            print(f'Node : *** {root.feature_name} ***')
        if root.left:
            print(f'if feature {root.feature_name} is 1:')
            if root.left.feature_name == 1 or root.left.feature_name == 0:
                ans = '1 (Cat)' if root.left.feature_name == 1 else '0 (Not Cat)'
                print(f'\t Reached leaf node : {ans}')
            else:
                print(f'\t Go to left: {root.left.feature_name}')
            q.put(root.left)
        if root.right:
            print(f'else:')
            if root.right.feature_name == 1 or root.right.feature_name == 0:
                ans = '1 (Cat)' if root.right.feature_name == 1 else '0 (Not Cat)'
                print(f'\t Reached leaf node : {ans}')
            else:
                print(f'\t Got to right: {root.right.feature_name}')
            q.put(root.right)
        
        if root.feature_name != 1 and root.feature_name != 0:
            print('--------------------------------------------')

In [1385]:
# Displaying Our Decision Tree
# REMEMBER !!!  We used one-hot encoding to encode the categorical features as follows:
    # Ear Shape: Pointy = 1, Floppy = 0
    # Face Shape: Round = 1, Not Round = 0
    # Whiskers: Present = 1, Absent = 0
level_order(root, 2)

--------------------------------------------
Node : *** Ear Shape ***
if feature Ear Shape is 1:
	 Go to left: Face Shape
else:
	 Got to right: Whiskers
--------------------------------------------
Node : *** Face Shape ***
if feature Face Shape is 1:
	 Reached leaf node : 1 (Cat)
else:
	 Reached leaf node : 0 (Not Cat)
--------------------------------------------
Node : *** Whiskers ***
if feature Whiskers is 1:
	 Reached leaf node : 1 (Cat)
else:
	 Reached leaf node : 0 (Not Cat)
--------------------------------------------


In [1386]:
def compute_prediction_helper(root, x_test):
    # If feature value is 1 : go to left 
    # else : go to right
    ans = None
    if root.end_node:
        return root.feature_name
    feature_index = root.feature_index
    if x_test[feature_index] == 1:
        ans = compute_prediction_helper(root.left, x_test)
    else:
        ans = compute_prediction_helper(root.right, x_test)

    return ans

In [1387]:
# Compute single prediction
def compute_prediction(root, test_example):
    prediction = compute_prediction_helper(root, test_example)
    return prediction

In [1388]:
# Testing our Decision Tree our X_train (training dataset)
# if pred is 1 --> Cat 
# if pred is 0 --> Not Cat
for i, test_example in enumerate(X_train):
    pred = compute_prediction(root, test_example)
    print(f'Example {test_example} & prediction : {pred} actual : {y_train[i]}')

Example [1 1 1] & prediction : 1 actual : 1
Example [0 0 1] & prediction : 1 actual : 1
Example [0 1 0] & prediction : 0 actual : 0
Example [1 0 1] & prediction : 0 actual : 0
Example [1 1 1] & prediction : 1 actual : 1
Example [1 1 0] & prediction : 1 actual : 1
Example [0 0 0] & prediction : 0 actual : 0
Example [1 1 0] & prediction : 1 actual : 1
Example [0 1 0] & prediction : 0 actual : 0
Example [0 1 0] & prediction : 0 actual : 0
