In [1]:
import numpy as np
import pandas as pd
from math import log2

In [2]:
data = pd.DataFrame({
    'Instance': ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10', 'S11', 'S12', 'S13', 'S14'],
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
    'Play Tennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
})

In [3]:
def entropy(column):
    values, counts = np.unique(column, return_counts=True)
    probabilities = counts / len(column)
    return -sum(p * log2(p) for p in probabilities)


def information_gain(data, target_column, feature_column):
    # Calculate the total entropy of the target
    total_entropy = entropy(data[target_column])

    values, counts = np.unique(data[feature_column], return_counts=True)
    weighted_entropy = sum((counts[i] / sum(counts)) * entropy(data[data[feature_column] == values[i]][target_column]) for i in range(len(values)))

    return total_entropy - weighted_entropy

In [4]:
print("Entropies:")
target_entropy = entropy(data['Play Tennis'])
print(f"Entropy of Play Tennis: {target_entropy:.4f}")


Entropies:
Entropy of Play Tennis: 0.9403


In [5]:
features = ['Outlook', 'Temperature', 'Humidity', 'Wind']
feature_entropies = {}
for feature in features:
    feature_entropy = entropy(data[feature])
    feature_entropies[feature] = feature_entropy
    print(f"Entropy of {feature}: {feature_entropy:.4f}")

Entropy of Outlook: 1.5774
Entropy of Temperature: 1.5567
Entropy of Humidity: 1.0000
Entropy of Wind: 0.9852


In [6]:
print("\nInformation Gains:")
for feature in features:
    ig = information_gain(data, 'Play Tennis', feature)
    print(f"Information Gain for {feature}: {ig:.4f}")



Information Gains:
Information Gain for Outlook: 0.2467
Information Gain for Temperature: 0.0292
Information Gain for Humidity: 0.1518
Information Gain for Wind: 0.0481
