In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils import *

In [5]:
X_train = np.array([[1, 1, 1], # 用one-hot code 来代表input feature
[0, 0, 1],
 [0, 1, 0],
 [1, 0, 1],
 [1, 1, 1],
 [1, 1, 0],
 [0, 0, 0],
 [1, 1, 0],
 [0, 1, 0],
 [0, 1, 0]])

y_train = np.array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0])

In [6]:
#For instance, the first example
X_train[0]

array([1, 1, 1])

In [7]:
# compute entropy

def entropy(p):
    if p == 0 or p ==1:
        return 0
    else:
        return -p* np.log2(p) - (1-p)*np.log2(1-p)
print(entropy(0.5))

1.0


#### enumerate的使用
https://www.runoob.com/python/python-func-enumerate.html

In [13]:
# decide how to split the features
# index feature = 0 ear shape
# index feature = 1 face shape
# index feature = 2 whiskers 
def split_indices(X,index_feature):
    left_indices = []
    right_indices = []
    for i,x in enumerate(X):
        if x[index_feature] ==1:
            left_indices.append(i)
        else:
            right_indices.append(i)
    return left_indices, right_indices



In [14]:
split_indices(X_train,0) # ear shape split

([0, 3, 4, 5, 7], [1, 2, 6, 8, 9])

In [18]:
# define weighted entropy

def weighted_entropy(X,y, left_indices, right_indices):
    w_left = len(left_indices)/len(X)
    w_right = len(right_indices)/len(X)
    p_left = sum(y[left_indices])/len(left_indices)
    p_right = sum(y[right_indices])/len(right_indices)
    
    weighted_entropy = w_left * entropy(p_left) + w_right * entropy(p_right)
    return weighted_entropy

In [19]:
left_indices, right_indices = split_indices(X_train, 0)
weighted_entropy(X_train, y_train, left_indices, right_indices)

0.7219280948873623

In [22]:
def information_gain(X,y,left_indices, right_indices):
    
    p_node = sum(y)/len(y)
    h_node = entropy(p_node)
    w_entropy = weighted_entropy(X,y,left_indices,right_indices)
    return h_node - w_entropy

In [23]:
information_gain(X_train, y_train, left_indices, right_indices)

0.2780719051126377

#### compute information gain if we split the root for each feature

In [None]:
for i, feature_name in enumerate(['Ear Shape', 'Face Shape', 'Whiskers']):
    left_indices, right_indices = split_indices(X_train, i)
    i_gain = information_gain(X_train, y_train, left_indices, right_indices)
    print(f"Feature: {feature_name}, information gain if we split the root node using this feature: {i_gain:.2f}")
    