In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

- `X_train`: for each example, contains 3 features:
            - Ear Shape (1 if pointy, 0 otherwise)
            - Face Shape (1 if round, 0 otherwise)
            - Whiskers (1 if present, 0 otherwise)
            
- `y_train`: whether the animal is a cat
            - 1 if the animal is a cat
            - 0 otherwise

In [2]:
X_train = np.array([[1, 1, 1],
[0, 0, 1],
 [0, 1, 0],
 [1, 0, 1],
 [1, 1, 1],
 [1, 1, 0],
 [0, 0, 0],
 [1, 1, 0],
 [0, 1, 0],
 [0, 1, 0]])

y_train = np.array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0])

In [3]:
X_train[0]

array([1, 1, 1])

In [5]:
# compute entropy
def entropy(p):
  if p == 0 or p == 1:
    return 0
  else :
    return -p * np.log2(p) - (1 - p) * np.log2(1 - p)
print(entropy(0.5))

1.0


In [9]:
def split_indices (X , index_feature):
    """Given a dataset and a index feature, return two lists for the two split nodes, the left node has the animals that have
    that feature = 1 and the right node those that have the feature = 0
    index feature = 0 => ear shape
    index feature = 1 => face shape
    index feature = 2 => whiskers
    """
    left_indices = []
    right_indices = []
    for i , x in enumerate(X):
      if x[index_feature] == 1:
        left_indices.append(i)
      else:
        right_indices.append(i)
    return left_indices , right_indices

In [10]:
split_indices(X_train , 0)

([0, 3, 4, 5, 7], [1, 2, 6, 8, 9])

In [11]:
def weighted_entropy(X , y , left_indices , right_indices):
    """
    This function takes the splitted dataset, the indices we chose to split and returns the weighted entropy.
    """
    w_left = len(left_indices) / len(X)
    w_right = len(right_indices) / len(X)
    p_left = sum(y[left_indices]) / len(left_indices)
    p_right = sum(y[right_indices]) / len(right_indices)
    weighted_entropy = w_left * entropy(p_left) + w_right * entropy(p_right)
    return weighted_entropy

In [12]:
left_indices, right_indices = split_indices(X_train, 0)
weighted_entropy(X_train, y_train, left_indices, right_indices)

np.float64(0.7219280948873623)

In [13]:
def information_gain(X,y,left_indices,right_indices):
  p_node = sum(y) / len(y)
  h_node = entropy(p_node)
  w_entropy = weighted_entropy(X,y,left_indices,right_indices)
  return h_node - w_entropy

In [14]:
information_gain(X_train, y_train, left_indices, right_indices)

np.float64(0.2780719051126377)

In [15]:
for i , feature_name in enumerate(['Ear Shape', 'Face Shape', 'Whiskers']):
    left_indices, right_indices = split_indices(X_train, i)
    i_gain = information_gain(X_train, y_train, left_indices, right_indices)
    print(f"Feature: {feature_name}, information gain if we split the root node using this feature: {i_gain:.2f}")

Feature: Ear Shape, information gain if we split the root node using this feature: 0.28
Feature: Face Shape, information gain if we split the root node using this feature: 0.03
Feature: Whiskers, information gain if we split the root node using this feature: 0.12
