In [1]:
import pandas as pd
from math import log2

In [2]:
data = [
 ["Young","High","No","Fair","No"],
 ["Young","High","No","Good","No"],
 ["Middle","High","No","Fair","Yes"],
 ["Old","Medium","No","Fair","Yes"],
 ["Old","Low","Yes","Fair","Yes"],
 ["Old","Low","Yes","Good","No"],
 ["Middle","Low","Yes","Good","Yes"],
 ["Young","Medium","No","Fair","No"],
 ["Young","Low","Yes","Fair","Yes"],
 ["Old","Medium","Yes","Fair","Yes"],
 ["Young","Medium","Yes","Good","Yes"],
 ["Middle","Medium","No","Good","Yes"],
 ["Middle","High","Yes","Fair","Yes"],
 ["Old","Medium","No","Good","No"]
]

In [3]:
df = pd.DataFrame(data, columns = ["Age", "Income", "Married", "Health", "Class"])
print(df["Age"].value_counts().rename_axis("Age").reset_index(name="Frequency"))

      Age  Frequency
0   Young          5
1     Old          5
2  Middle          4


In [5]:
def entropy(counts):
    total = sum(counts)
    return -(sum((c/total)*log2(c/total) for c in counts if c>0))

In [6]:
class_counts = df['Class'].value_counts()
parent_entropy = entropy(class_counts.values)

In [8]:
n = len(df)
weighted_entropy = 0.0
for name, group in df.groupby("Age"):
    counts = group['Class'].value_counts()
    e = entropy(counts.values)
    weighted_entropy += len(group)/n*e

In [9]:
info_gain = parent_entropy - weighted_entropy
print("Parent Entropy:", parent_entropy)
print("Weighted Entropy:", weighted_entropy)
print("Information Gain (Age):", info_gain)

Parent Entropy: 0.9402859586706311
Weighted Entropy: 0.6935361388961918
Information Gain (Age): 0.24674981977443933
