In [1]:
import pandas as pd
import math

# Create a DataFrame from the given classification data
data = {
    'OUTLOOK': ['Sunny', 'Sunny', 'Cloudy', 'Rainy', 'Rainy', 'Rainy', 'Cloudy', 'Sunny', 'Sunny', 'Rainy', 'Sunny', 'Cloudy', 'Cloudy', 'Rainy'],
    'TEMPERATURE': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'HUMIDITY': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'WINDY': ['No', 'Yes', 'No', 'No', 'No', 'Yes', 'Yes', 'No', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes'],
    'PLAY': ["Don't Play", "Don't Play", 'Play', 'Play', 'Play', 'Play', 'Play', "Don't Play", 'Play', 'Play', 'Play', 'Play', 'Play', "Don't Play"]
}

df = pd.DataFrame(data)

# Calculate the entropy of the target variable 'PLAY'
play_counts = df['PLAY'].value_counts()
total_examples = play_counts.sum()
p_play = play_counts['Play'] / total_examples
p_dont_play = play_counts["Don't Play"] / total_examples
entropy_play = -p_play * math.log2(p_play) - p_dont_play * math.log2(p_dont_play)

print("Entropy of 'PLAY':", entropy_play)

# Calculate the information gain for each feature
def calculate_entropy(feature):
    feature_counts = df.groupby(feature)['PLAY'].value_counts()
    total_examples = feature_counts.sum()
    entropy = 0

    for value in df[feature].unique():
        value_counts = feature_counts[value]
        p_play = value_counts.get('Play', 0) / total_examples
        p_dont_play = value_counts.get("Don't Play", 0) / total_examples

        if p_play != 0 and p_dont_play != 0:
            entropy -= (p_play * math.log2(p_play) + p_dont_play * math.log2(p_dont_play))

    return entropy

def calculate_information_gain(feature):
    feature_entropy = calculate_entropy(feature)
    information_gain = entropy_play - feature_entropy
    return information_gain

# Calculate the information gain for each feature
features = ['OUTLOOK', 'TEMPERATURE', 'HUMIDITY', 'WINDY']
for feature in features:
    information_gain = calculate_information_gain(feature)
    print("Information Gain for", feature + ":", information_gain)


Entropy of 'PLAY': 0.863120568566631
Information Gain for OUTLOOK: -0.8024981256056956
Information Gain for TEMPERATURE: -0.8564186614745146
Information Gain for HUMIDITY: -0.1294934994504947
Information Gain for WINDY: -0.9792504246104774
