In [4]:
import numpy as np
import pandas as pd
from collections import Counter

# Calculate Entropy
def entropy(data):
    """
    Calculate the entropy of the dataset.
    Parameters:
        data: List or array of target labels
    Returns:
        entropy_value: Entropy of the dataset
    """
    counts = Counter(data)
    total = len(data)
    entropy_value = -sum((count / total) * np.log2(count / total) for count in counts.values())
    return entropy_value

# Calculate Information Gain
def information_gain(data, feature_index, target):
    """
    Calculate the information gain for a feature.
    Parameters:
        data: Dataset (features and target)
        feature_index: Index of the feature to evaluate
        target: Target labels
    Returns:
        IG: Information Gain for the feature
    """
    total_entropy = entropy(target)
    values, counts = np.unique(data[:, feature_index], return_counts=True)
    weighted_entropy = sum(
        (counts[i] / len(target)) * entropy(target[data[:, feature_index] == values[i]])
        for i in range(len(values))
    )
    return total_entropy - weighted_entropy

# ID3 Algorithm
def id3(data, target, features, depth=0, max_depth=None):
    """
    Build a decision tree using the ID3 algorithm.
    Parameters:
        data: Feature matrix (numpy array)
        target: Target labels
        features: List of feature names
        depth: Current depth of the tree (default: 0)
        max_depth: Maximum allowed depth of the tree (default: None)
    Returns:
        tree: The decision tree as a dictionary
    """
    # Base cases
    if len(np.unique(target)) == 1:  # Pure node
        return target[0]
    if len(features) == 0 or (max_depth is not None and depth >= max_depth):  # No features or depth limit reached
        return Counter(target).most_common(1)[0][0]

    # Find the best feature to split on
    feature_gains = [information_gain(data, i, target) for i in range(len(features))]
    best_feature_index = np.argmax(feature_gains)
    best_feature = features[best_feature_index]

    # Create a subtree
    tree = {best_feature: {}}
    values = np.unique(data[:, best_feature_index])

    for value in values:
        sub_data = data[data[:, best_feature_index] == value]
        sub_target = target[data[:, best_feature_index] == value]
        sub_features = features[:best_feature_index] + features[best_feature_index + 1:]
        subtree = id3(
            np.delete(sub_data, best_feature_index, axis=1),
            sub_target,
            sub_features,
            depth=depth + 1,
            max_depth=max_depth
        )
        tree[best_feature][value] = subtree

    return tree

# Function to print the tree in a structured format
def print_tree(tree, depth=0):
    """
    Print the decision tree in a readable structure with indentation.
    Parameters:
        tree: The decision tree (nested dictionary)
        depth: Current depth of the tree for indentation
    """
    if isinstance(tree, dict):
        for key, value in tree.items():
            print("  " * depth + str(key))  # Print feature name
            print_tree(value, depth + 1)   # Recurse into subtree
    else:
        print("  " * depth + f"--> {tree}")  # Print leaf node

# Example Usage
if __name__ == "__main__":
    # Dataset
    data = np.array([
        ['Sunny', 'Hot', 'High', 'Weak', 'No'],
        ['Sunny', 'Hot', 'High', 'Strong', 'No'],
        ['Overcast', 'Hot', 'High', 'Weak', 'Yes'],
        ['Rain', 'Mild', 'High', 'Weak', 'Yes'],
        ['Rain', 'Cool', 'Normal', 'Weak', 'Yes'],
        ['Rain', 'Cool', 'Normal', 'Strong', 'No'],
        ['Overcast', 'Cool', 'Normal', 'Strong', 'Yes'],
        ['Sunny', 'Mild', 'High', 'Weak', 'No'],
        ['Sunny', 'Cool', 'Normal', 'Weak', 'Yes'],
        ['Rain', 'Mild', 'Normal', 'Weak', 'Yes'],
        ['Sunny', 'Mild', 'Normal', 'Strong', 'Yes'],
        ['Overcast', 'Mild', 'High', 'Strong', 'Yes'],
        ['Overcast', 'Hot', 'Normal', 'Weak', 'Yes'],
        ['Rain', 'Mild', 'High', 'Strong', 'No']
    ])
    feature_names = ['Outlook', 'Temperature', 'Humidity', 'Wind']
    target = data[:, -1]
    features = feature_names
    tree = id3(data[:, :-1], target, features)
    
    # Display the Decision Tree
    print("Structured Decision Tree:")
    print_tree(tree)


Structured Decision Tree:
Outlook
  Overcast
    --> Yes
  Rain
    Wind
      Strong
        --> No
      Weak
        --> Yes
  Sunny
    Humidity
      High
        --> No
      Normal
        --> Yes
