In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils import *

In [26]:
x_train = np.array([[1, 1, 1],
[0, 0, 1],
[0, 1, 0],
[1, 0, 1],
[1, 1, 1],
[1, 1, 0],
[0, 1, 0],
[0, 1, 0]])

y_train = np.array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0])

In [27]:
x_train[0]

array([1, 1, 1])

### Calculate Entropy

In [28]:
def entropy(p):
    if p == 0 or p == 1:
        return 0
    else:
        return -p * np.log2(p) - (1 - p) * np.log2(1 - p)
    
print(entropy(0.5))

1.0


In [29]:
def split_indices(X, index_feature):
    left_indices = []
    right_indices = []
    for i, x in enumerate(X):
        if x[index_feature] == 1:
            left_indices.append(i)
        else:
            right_indices.append(i)
    return left_indices, right_indices

In [30]:
split_indices(x_train, 0)

([0, 3, 4, 5], [1, 2, 6, 7])

In [31]:
def weighted_entropy(x, y, left_indices, right_indices):
    w_left = len(left_indices) / len(x)
    w_right = len(right_indices) / len(x)
    p_left = sum(y[left_indices]) / len(left_indices)
    p_right = sum(y[right_indices]) / len(right_indices)

    weight_entropy = w_left * entropy(p_left) + w_right * entropy(p_right)
    return weight_entropy

In [32]:
left_indices, right_indices = split_indices(x_train, 0)
weighted_entropy(x_train, y_train, left_indices, right_indices)

0.9056390622295665

The weighted entropy in the 2 split nodes is **0.90564.** To compute **Information Gain** we must subtract it from the entropy in the node we chose to split. 

In [33]:
def information_gain(x, y, left_indices, right_indices):
    p_node = sum(y)/len(y)
    h_node = entropy(p_node)
    w_entropy = weighted_entropy(x, y, left_indices, right_indices)

    return h_node - w_entropy

information_gain(x_train, y_train, left_indices, right_indices)

0.09436093777043353

In [34]:
for i, feature_name in enumerate(['Ear Shape', 'Face Shape', 'Whiskers']):
    left_indices, right_indices = split_indices(x_train, i)
    i_gain = information_gain(x_train, y_train, left_indices, right_indices)
    print(f"Feature: {feature_name}, information gain if we split the root node using this feature: {i_gain:.2f}")

Feature: Ear Shape, information gain if we split the root node using this feature: 0.09
Feature: Face Shape, information gain if we split the root node using this feature: 0.06
Feature: Whiskers, information gain if we split the root node using this feature: 0.09


We want to choose the higher information gain.