In [3]:
import pandas as pd
import numpy as np
from math import log2

# Step 1: Create the dataset
data = {
    'Age': ['Young', 'Young', 'Middle', 'Old', 'Old', 'Old', 'Middle', 'Young', 'Young',
            'Old', 'Young', 'Middle', 'Middle', 'Old'],
    'Class': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes',
              'Yes', 'Yes', 'No']
}
df = pd.DataFrame(data)

# Step 2: Print frequency table for Age
print("Step 2: Frequency Table for Age")
freq_table = df['Age'].value_counts()
print(freq_table)
print("\n")

# Step 3: Calculate class distribution for each Age value
print("Step 3: Class Distribution for each Age value")
class_dist = df.groupby(['Age', 'Class']).size()
print(class_dist)
print("\n")

# Step 4: Calculate total entropy
total_yes = len(df[df['Class'] == 'Yes'])
total_no = len(df[df['Class'] == 'No'])
total = len(df)

p_yes = total_yes/total
p_no = total_no/total
total_entropy = -p_yes * log2(p_yes) - p_no * log2(p_no)

print("Step 4: Total Entropy")
print(f"Total Entropy = {total_entropy:.4f}")
print("\n")

# Step 5: Calculate entropy for each Age value
print("Step 5: Entropy for each Age value")
weighted_entropy = 0

for age in df['Age'].unique():
    age_data = df[df['Age'] == age]
    age_total = len(age_data)

    yes_count = len(age_data[age_data['Class'] == 'Yes'])
    no_count = len(age_data[age_data['Class'] == 'No'])

    # Calculate probability
    p_yes = yes_count/age_total
    p_no = no_count/age_total

    # Calculate entropy
    if p_yes == 0 or p_no == 0:
        entropy = 0
    else:
        entropy = -p_yes * log2(p_yes) - p_no * log2(p_no)

    # Calculate weighted entropy
    weight = age_total/total
    weighted_entropy += weight * entropy

    print(f"Age {age}:")
    print(f"Entropy: {entropy:.4f}")
    print()

# Step 6: Calculate information gain
info_gain = total_entropy - weighted_entropy

print("Step 6: Final Results")
print(f"Total Entropy: {total_entropy:.4f}")
print(f"Weighted Entropy: {weighted_entropy:.4f}")
print(f"Information Gain: {info_gain:.4f}")

Step 2: Frequency Table for Age
Age
Young     5
Old       5
Middle    4
Name: count, dtype: int64


Step 3: Class Distribution for each Age value
Age     Class
Middle  Yes      4
Old     No       2
        Yes      3
Young   No       3
        Yes      2
dtype: int64


Step 4: Total Entropy
Total Entropy = 0.9403


Step 5: Entropy for each Age value
Age Young:
Entropy: 0.9710

Age Middle:
Entropy: 0.0000

Age Old:
Entropy: 0.9710

Step 6: Final Results
Total Entropy: 0.9403
Weighted Entropy: 0.6935
Information Gain: 0.2467
