In [7]:
# Import required libraries
import math
from collections import Counter, defaultdict

# 1. Entropy calculation functions
def entropy(class_probabilities):
    """Calculate entropy from a list of class probabilities"""
    return sum(-p * math.log(p, 2) for p in class_probabilities if p)

def class_probabilities(labels):
    """Calculate class probabilities from a list of labels"""
    total_count = len(labels)
    return [count / total_count for count in Counter(labels).values()]

def data_entropy(labeled_data):
    """Calculate entropy for a labeled dataset"""
    labels = [label for _, label in labeled_data]
    probabilities = class_probabilities(labels)
    return entropy(probabilities)

# 2. Partitioning functions
def partition_entropy(subsets):
    """Calculate weighted average entropy for a list of data subsets"""
    total_count = sum(len(subset) for subset in subsets)
    return sum(data_entropy(subset) * len(subset) / total_count 
              for subset in subsets)

def partition_by(inputs, attribute):
    """Partition the data by attribute values"""
    groups = defaultdict(list)
    for input in inputs:
        key = input[0][attribute]
        groups[key].append(input)
    return groups

def partition_entropy_by(inputs, attribute):
    """Calculate partition entropy for a given attribute"""
    partitions = partition_by(inputs, attribute)
    return partition_entropy(partitions.values())

# 3. Tree classification and building functions
def classify(tree, input):
    """Classify a new input using the decision tree"""
    if tree in ['Yes', 'No']:
        return tree
    
    attribute, subtree_dict = tree
    subtree_key = input.get(attribute)
    
    if subtree_key not in subtree_dict:
        subtree_key = None
    
    subtree = subtree_dict[subtree_key]
    return classify(subtree, input)

def build_tree_id3(inputs, split_attributes):
    """Build a decision tree using the ID3 algorithm"""
    # Get all class labels
    class_labels = [label for _, label in inputs]
    
    # If all examples have the same label, return that label
    if len(set(class_labels)) == 1:
        return class_labels[0]
    
    # If no split attributes left, return most common label
    if not split_attributes:
        return Counter(class_labels).most_common(1)[0][0]
    
    # Find the best attribute to split on
    def split_entropy(attr):
        return partition_entropy_by(inputs, attr)
    
    best_attribute = min(split_attributes, key=split_entropy)
    
    # Create partitions and recursively build subtrees
    partitions = partition_by(inputs, best_attribute)
    new_attributes = [a for a in split_attributes if a != best_attribute]
    
    subtree_dict = {attribute_value: build_tree_id3(subset, new_attributes)
                    for attribute_value, subset in partitions.items()}
    
    # Handle unknown values in test data
    subtree_dict[None] = Counter(class_labels).most_common(1)[0][0]
    
    return (best_attribute, subtree_dict)

# 4. Sample dataset
inputs = [
    ({'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak'}, 'No'),
    ({'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Strong'}, 'No'),
    ({'Outlook': 'Overcast', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak'}, 'Yes'),
    ({'Outlook': 'Rain', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Weak'}, 'Yes'),
    ({'Outlook': 'Rain', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Weak'}, 'Yes'),
    ({'Outlook': 'Rain', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Strong'}, 'No'),
    ({'Outlook': 'Overcast', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Strong'}, 'Yes'),
    ({'Outlook': 'Sunny', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Weak'}, 'No'),
    ({'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Weak'}, 'Yes'),
    ({'Outlook': 'Rain', 'Temperature': 'Mild', 'Humidity': 'Normal', 'Wind': 'Weak'}, 'Yes'),
    ({'Outlook': 'Sunny', 'Temperature': 'Mild', 'Humidity': 'Normal', 'Wind': 'Strong'}, 'Yes'),
    ({'Outlook': 'Overcast', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Strong'}, 'Yes'),
    ({'Outlook': 'Overcast', 'Temperature': 'Hot', 'Humidity': 'Normal', 'Wind': 'Weak'}, 'Yes'),
    ({'Outlook': 'Rain', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Strong'}, 'No')
    # add more data points here ..
]

# List of attributes to consider for splitting
split_attributes = ['Outlook', 'Temperature', 'Humidity', 'Wind']

# 5. Build the tree
tree = build_tree_id3(inputs, split_attributes)

# 6. Make predictions
test_sample = {
    'Outlook': 'Rain',
    'Temperature': 'Cool', 
    'Humidity': 'Normal',
    'Wind': 'Strong'
}

test_sample_2 = {
    'Outlook': 'Sunny',
    'Temperature': 'Hot', 
    'Humidity': 'High',
    'Wind': 'Weak'
}

test_sample_3 = {
    'Outlook': 'Overcast',
    'Temperature': 'Mild', 
    'Humidity': 'High',
    'Wind': 'Strong'
}

# predict the label for the test sample
predicted_label = classify(tree, test_sample)
predicted_label_2 = classify(tree, test_sample_2)
predicted_label_3 = classify(tree, test_sample_3)


# Print the results
print(f"Predicted label 1 for {test_sample}: {predicted_label}")
print(f"Predicted label 2 for {test_sample_2}: {predicted_label_2}")
print(f"Predicted label 3 for {test_sample_3}: {predicted_label_3}")
print("--------------------------------------")
print(f"Tree: {tree}")
print("--------------------------------------")
print(f"Inputs: {inputs}")  
print("--------------------------------------")
print(f"Split attributes: {split_attributes}")
print("--------------------------------------")
print(f"Test sample 1: {test_sample}")
print(f"Test sample 2: {test_sample_2}")
print(f"Test sample 3: {test_sample_3}")




Predicted label 1 for {'Outlook': 'Rain', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Strong'}: No
Predicted label 2 for {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak'}: No
Predicted label 3 for {'Outlook': 'Overcast', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Strong'}: Yes
--------------------------------------
Tree: ('Outlook', {'Sunny': ('Humidity', {'High': 'No', 'Normal': 'Yes', None: 'No'}), 'Overcast': 'Yes', 'Rain': ('Wind', {'Weak': 'Yes', 'Strong': 'No', None: 'Yes'}), None: 'Yes'})
--------------------------------------
Inputs: [({'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak'}, 'No'), ({'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Strong'}, 'No'), ({'Outlook': 'Overcast', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak'}, 'Yes'), ({'Outlook': 'Rain', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Weak'}, 'Yes'), ({'Outlook': 'Rain', 'Temperature': 'Cool', 