In [1]:
import math
from collections import Counter

def entropy(data):
    """ Calculate the entropy of a dataset """
    labels = [item['label'] for item in data]
    label_counts = Counter(labels)
    entropy = 0
    total = len(data)
    for count in label_counts.values():
        probability = count / total
        entropy -= probability * math.log2(probability)
    return entropy

def information_gain(data, attribute):
    """ Calculate the information gain of an attribute in the dataset """
    values = set([item[attribute] for item in data])
    remainder = 0
    total = len(data)
    
    for value in values:
        subset = [item for item in data if item[attribute] == value]
        remainder += (len(subset) / total) * entropy(subset)
        
    return entropy(data) - remainder

def id3(data, features, target_attribute):
    """ ID3 algorithm to build a decision tree """
    labels = [item[target_attribute] for item in data]
    if len(set(labels)) == 1:  # If all labels are the same, return a leaf node with that label
        return labels[0]
    
    if len(features) == 0:  # If no more features to split on, return the most common label
        return Counter(labels).most_common(1)[0][0]
    
    # Choose the best attribute to split on
    best_attribute = max(features, key=lambda attribute: information_gain(data, attribute))
    tree = {best_attribute: {}}
    remaining_features = [f for f in features if f != best_attribute]
    
    # Split the dataset based on the best attribute
    for value in set([item[best_attribute] for item in data]):
        subset = [item for item in data if item[best_attribute] == value]
        subtree = id3(subset, remaining_features, target_attribute)
        tree[best_attribute][value] = subtree
        
    return tree

# Example usage
if __name__ == "__main__":
    # Example dataset (weather outlook and whether to play golf)
    data = [
        {'outlook': 'sunny', 'temperature': 'hot', 'humidity': 'high', 'windy': False, 'label': 'no'},
        {'outlook': 'sunny', 'temperature': 'hot', 'humidity': 'high', 'windy': True, 'label': 'no'},
        {'outlook': 'overcast', 'temperature': 'hot', 'humidity': 'high', 'windy': False, 'label': 'yes'},
        {'outlook': 'rainy', 'temperature': 'mild', 'humidity': 'high', 'windy': False, 'label': 'yes'},
        {'outlook': 'rainy', 'temperature': 'cool', 'humidity': 'normal', 'windy': False, 'label': 'yes'},
        {'outlook': 'rainy', 'temperature': 'cool', 'humidity': 'normal', 'windy': True, 'label': 'no'},
        {'outlook': 'overcast', 'temperature': 'cool', 'humidity': 'normal', 'windy': True, 'label': 'yes'},
        {'outlook': 'sunny', 'temperature': 'mild', 'humidity': 'high', 'windy': False, 'label': 'no'},
        {'outlook': 'sunny', 'temperature': 'cool', 'humidity': 'normal', 'windy': False, 'label': 'yes'},
        {'outlook': 'rainy', 'temperature': 'mild', 'humidity': 'normal', 'windy': False, 'label': 'yes'},
        {'outlook': 'sunny', 'temperature': 'mild', 'humidity': 'normal', 'windy': True, 'label': 'yes'},
        {'outlook': 'overcast', 'temperature': 'mild', 'humidity': 'high', 'windy': True, 'label': 'yes'},
        {'outlook': 'overcast', 'temperature': 'hot', 'humidity': 'normal', 'windy': False, 'label': 'yes'},
        {'outlook': 'rainy', 'temperature': 'mild', 'humidity': 'high', 'windy': True, 'label': 'no'}
    ]

    features = ['outlook', 'temperature', 'humidity', 'windy']
    target_attribute = 'label'

    decision_tree = id3(data, features, target_attribute)
    print("Decision Tree:")
    print(decision_tree)


Decision Tree:
{'outlook': {'rainy': {'windy': {False: 'yes', True: 'no'}}, 'sunny': {'humidity': {'high': 'no', 'normal': 'yes'}}, 'overcast': 'yes'}}
