In [1]:
import numpy as np

|                                                     |   Ear Shape | Face Shape | Whiskers |   Cat  |
|:---------------------------------------------------:|:---------:|:-----------:|:---------:|:------:|
| <img src="images/0.png" alt="drawing" width="50"/> |   Pointy   |   Round     |  Present  |    1   |
| <img src="images/1.png" alt="drawing" width="50"/> |   Floppy   |  Not Round  |  Present  |    1   |
| <img src="images/2.png" alt="drawing" width="50"/> |   Floppy   |  Round      |  Absent   |    0   |
| <img src="images/3.png" alt="drawing" width="50"/> |   Pointy   |  Not Round  |  Present  |    0   |
| <img src="images/4.png" alt="drawing" width="50"/> |   Pointy   |   Round     |  Present  |    1   |
| <img src="images/5.png" alt="drawing" width="50"/> |   Pointy   |   Round     |  Absent   |    1   |
| <img src="images/6.png" alt="drawing" width="50"/> |   Floppy   |  Not Round  |  Absent   |    0   |
| <img src="images/7.png" alt="drawing" width="50"/> |   Pointy   |  Round      |  Absent   |    1   |
| <img src="images/8.png" alt="drawing" width="50"/> |    Floppy  |   Round     |  Absent   |    0   |
| <img src="images/9.png" alt="drawing" width="50"/> |   Floppy   |  Round      |  Absent   |    0   |


We will use **one-hot encoding** to encode the categorical features. They will be as follows:

- Ear Shape: Pointy = 1, Floppy = 0
- Face Shape: Round = 1, Not Round = 0
- Whiskers: Present = 1, Absent = 0

Therefore, we have two sets:

- `X_train`: for each example, contains 3 features:
            - Ear Shape (1 if pointy, 0 otherwise)
            - Face Shape (1 if round, 0 otherwise)
            - Whiskers (1 if present, 0 otherwise)
            
- `y_train`: whether the animal is a cat
            - 1 if the animal is a cat
            - 0 otherwise

In [2]:
X_train = np.array([[1, 1, 1],
[0, 0, 1],
 [0, 1, 0],
 [1, 0, 1],
 [1, 1, 1],
 [1, 1, 0],
 [0, 0, 0],
 [1, 1, 0],
 [0, 1, 0],
 [0, 1, 0]])

y_train = np.array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0])

In [5]:
def entropy(p):
    if p==0 or p==1:
        return 0
    else:
        H=-p*np.log2(p)-(1-p)*np.log2(1-p)
        return H

In [8]:
def split_indices(X,index_feature):
    left_indices=[]
    right_indices=[]
    for i,x in enumerate(X):
        if x[index_feature]==1:
            left_indices.append(i)
        else:
            right_indices.append(i)
    return left_indices,right_indices

In [9]:
def weighted_entropy(X,y,left_indices,right_indices):
    p_right = sum(y[right_indices])/len(right_indices)
    p_left = sum(y[left_indices])/len(left_indices)
    w_right = len(right_indices)/len(X)
    w_left = len(left_indices)/len(X)

    return w_right*entropy(p_right)+w_left*entropy(p_left)


In [10]:
def information_gain (X,y,left_indices,right_indices):
    p_node = sum(y)/len(y)
    return entropy(p_node) - weighted_entropy(X,y,left_indices,right_indices)

In [13]:
for i, feature_name in enumerate(['Ear Shape', 'Face Shape', 'Whiskers']):
    left_indices,right_indices = split_indices(X_train,i)
    ig = information_gain(X_train,y_train, left_indices,right_indices)
    print(f"Feature :{feature_name} has information gain : {ig:.5f}")

Feature :Ear Shape has information gain : 0.27807
Feature :Face Shape has information gain : 0.03485
Feature :Whiskers has information gain : 0.12451
