In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
X_train = np.array([[1, 1, 1],
[0, 0, 1],
 [0, 1, 0],
 [1, 0, 1],
 [1, 1, 1],
 [1, 1, 0],
 [0, 0, 0],
 [1, 1, 0],
 [0, 1, 0],
 [0, 1, 0]])

y_train = np.array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0])

In [4]:
X_train[0]

array([1, 1, 1])

So, the root node has every animal in our dataset. $p_1^{node}$ is the proportion of positive class (cats) in the root node. So

$$p_1^{node} = \frac{5}{10} = 0.5$$

In [12]:
# function to compute the entropy.

In [23]:
def entropy(p):
    if p==1 or p==0:
        return 0
    else:
        return -p * np.log2(p) - (1 - p) * np.log2(1 - p)

In [24]:
print(entropy(0.5))

1.0


In [25]:
def split_indices(X, index_feature):
    left_indeces = []
    right_indeces = []
    for i, x in enumerate(X):
        if x[index_feature] == 1:
            left_indeces.append(i)
        else:
            right_indeces.append(i)
    return left_indeces, right_indeces

So, if we choose Ear Shape to split, then we must have in the left node 

$$0 \quad 3 \quad 4 \quad 5 \quad 7$$

and the right indices, the remaining ones.

In [26]:
split_indices(X_train, 0)

([0, 3, 4, 5, 7], [1, 2, 6, 8, 9])

In [27]:
# function to compute the weighted entropy in the splitted nodes

In [28]:
def weighted_entropy(X,y,left_indices,right_indices):
    w_left = len(left_indices)/len(X)
    w_right = len(right_indices)/len(X)
    p_left = sum(y[left_indices])/len(left_indices)
    p_right = sum(y[right_indices])/len(right_indices)
    
    weighted_entropy = w_left * entropy(p_left) + w_right * entropy(p_right)
    return weighted_entropy

In [29]:
left_indices, right_indices = split_indices(X_train, 0)
weighted_entropy(X_train, y_train, left_indices, right_indices)

np.float64(0.7219280948873623)

 To compute the **Information Gain** we must subtract it from the entropy in the node we chose to split (in this case, the root node). 

In [31]:
def information_gain(X, y, left_indices, right_indices): 
    p_node = sum(y)/len(y)
    h_node = entropy(p_node)
    w_entropy = weighted_entropy(X,y,left_indices,right_indices)
    return h_node - w_entropy

In [32]:
information_gain(X_train, y_train, left_indices, right_indices)

np.float64(0.2780719051126377)

In [35]:
# computing the information gain if we split the root node for each feature:

In [37]:
for i, feature_name in enumerate(['Ear Shape', 'Face Shape', 'Whiskers']):
    left_indices, right_indices = split_indices(X_train, i)
    i_gain = information_gain(X_train, y_train, left_indices, right_indices)
    print(f"Feature: {feature_name}, information gain if we split the root node using this feature: {i_gain:.2f}")

Feature: Ear Shape, information gain if we split the root node using this feature: 0.28
Feature: Face Shape, information gain if we split the root node using this feature: 0.03
Feature: Whiskers, information gain if we split the root node using this feature: 0.12


In [38]:
best_feature = None
best_gain = -1

for i, feature_name in enumerate(['Ear Shape', 'Face Shape', 'Whiskers']):
    left_indices, right_indices = split_indices(X_train, i)
    i_gain = information_gain(X_train, y_train, left_indices, right_indices)

    if i_gain > best_gain:
        best_gain = i_gain
        best_feature = i

print(f"Best feature to split root: {best_feature}, gain = {best_gain:.2f}")


Best feature to split root: 0, gain = 0.28
