In [2]:
import pandas as pd
import math

In [3]:
# Create a DataFrame for the dataset
data = {
    'Age': ['Young', 'Young', 'Middle-aged', 'Senior', 'Senior', 'Middle-aged', 'Senior', 'Young', 'Middle-aged', 'Young'],
    'Income': ['High', 'Low', 'High', 'Medium', 'Low', 'Low', 'High', 'Medium', 'Medium', 'Low'],
    'Purchase': ['No', 'Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes']
}

In [4]:
df = pd.DataFrame(data)

In [8]:
# Calculate Information Gain for an attribute
def information_gain(attribute, target):
    # Calculate entropy for a given probability
    def entropy(prob):
        if prob == 0 or prob == 1:
            return 0
        return -prob * math.log2(prob)

    # Calculate entropy for a set of instances
    def entropy_for_set(yes_count, total_count):
        prob_yes = yes_count / total_count
        prob_no = 1 - prob_yes
        return entropy(prob_yes) + entropy(prob_no)

    total_instances = len(target)
    entropy_s = entropy_for_set(target.value_counts().get('Yes', 0), total_instances)

    weighted_entropy = 0
    for value in attribute.unique():
        subset_indices = attribute[attribute == value].index
        subset_entropy = entropy_for_set(target[subset_indices].value_counts().get('Yes', 0), len(subset_indices))
        weighted_entropy += len(subset_indices) / total_instances * subset_entropy

    information_gain_value = entropy_s - weighted_entropy
    return information_gain_value

In [9]:
# Separate features and target variable
X = df[['Age', 'Income']]
y = df['Purchase']

In [10]:
# Calculate Information Gain for Age
ig_age = information_gain(X['Age'], y)

In [11]:
# Calculate Information Gain for Income
ig_income = information_gain(X['Income'], y)

In [12]:
print(f"Information Gain for Age: {ig_age}")
print(f"Information Gain for Income: {ig_income}")

Information Gain for Age: 0.29546184423832167
Information Gain for Income: 0.09546184423832171
