### Decision Trees (Calculate Entropy & Information Gain)

In [1]:
import pandas as pd
from math import log2

In [2]:
data = {'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast'],
        'Temp': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool'], 
        'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal'], 
        'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Weak'], 
        'Play_Tennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes']}

df = pd.DataFrame(data)

df

Unnamed: 0,Outlook,Temp,Humidity,Wind,Play_Tennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Weak,Yes


Apply Mapping

In [3]:
for i in range(len(df.columns)):
    unique_columns = df[df.columns[i]].unique()
    df[df.columns[i]] = df[df.columns[i]].map({unique_columns[value]: value for value in range(len(unique_columns))})

df

Unnamed: 0,Outlook,Temp,Humidity,Wind,Play_Tennis
0,0,0,0,0,0
1,0,0,0,1,0
2,1,0,0,0,1
3,2,1,0,0,1
4,2,2,1,0,1
5,2,2,1,1,0
6,1,2,1,0,1


Get features and target

In [4]:
target = 'Play_Tennis'
features = df.drop(columns=target).columns
x = df[features]
y = df[target]

Entropy of the Outcome -> E(S)

In [5]:
from math import log2

def entropy_target(target):
    p_list = []
    n = target.unique()  # Number of unique outcomes
    for i in range(len(n)):  
        # Calculate the p(class) of the outcome
        p = (target.value_counts().get(n[i], 0)) / (len(target))
        # print(f"P(Target = {target.name} -> Outcome {n[i]}): {p}")
        p_list.append(p) 
    # Apply the formula (add p > 0 to handle entropy = 0)
    entropy = sum(- p * log2(p) for p in p_list if p > 0)
    # print(f"E(Target = {target.name}): {entropy}")

    return entropy

e_target = entropy_target(y)

Entropy of Feature

In [6]:
def entropy_feature(feature, target):
    df_combined = pd.concat([feature, target], axis=1)
    entropy_list = []
    n_feature = feature.unique()  # Number of unique classes
    n_target = target.unique()  # Number of unique outcomes
    # print("Feature =", feature.name)
    for i in range(len(n_feature)):
        p_list = []
        for j in range(len(n_target)):
            # Total number of samples for the class
            num_class = feature.value_counts().get(n_feature[i], 0)
            # Count the outcome for the current class, get the sub-dataset
            sub_df = df_combined[(df_combined[feature.name] == n_feature[i]) 
                                 & (df_combined[target.name] == n_target[j])]
            # Find the probability
            p = len(sub_df) / num_class if num_class != 0 else 0
            # print(f"+ P({n_feature[i]} -> Outcome = {n_target[j]}): {p}")
            p_list.append(p)
        # Compute the entropy
        entropy = sum(-p * log2(p) for p in p_list if p > 0)
        # print(f"- Entropy(Feature Class = {n_feature[i]}): {entropy}")
        # Append to list
        entropy_list.append(entropy)

    return entropy_list

# Take one feature for showcase
entropy_feature(x['Temp'], y)

[np.float64(0.9182958340544896),
 np.float64(0.0),
 np.float64(0.9182958340544896)]

Calculate the Weight Average

In [10]:
e_target = entropy_target(y)
weight_average = []
for i in range(len(x.columns)):
    unique_class = [x[x.columns[i]].value_counts().get(j, 0) for j in  x[x.columns[i]].unique()]
    entropy = entropy_feature(x[x.columns[i]], y)
    # Weight Average
    weight = sum((unique_class[j] / len(x)) * entropy[j] for j in range(len(unique_class)))
    print(f"Weight(Feature = {x.columns[i]}):", weight)
    weight_average.append(weight)

Weight(Feature = Outlook): 0.39355535745192405
Weight(Feature = Temp): 0.7871107149038481
Weight(Feature = Humidity): 0.9649839288804954
Weight(Feature = Wind): 0.5156629249195446


Information Gain

In [11]:
information_gain = []
for i in range(len(x.columns)):
    ig = e_target - weight_average[i]
    print(f"Information Gain(Feature = {x.columns[i]}):", ig)
    information_gain.append(ig)

Information Gain(Feature = Outlook): 0.5916727785823275
Information Gain(Feature = Temp): 0.19811742113040343
Information Gain(Feature = Humidity): 0.020244207153756077
Information Gain(Feature = Wind): 0.46956521111470695


Get the best feature for Decision Tree

In [12]:
for i in range(len(information_gain)):
    if information_gain.index(max(information_gain)) == i:
        print("Best feature:", x.columns[i])

Best feature: Outlook
