In [16]:
# import numpy as np
import pandas as pd
from collections import Counter

# Function to calculate entropy
def entropy(y):
    counts = np.bincount(y)
    probabilities = counts / len(y)
    ent = -np.sum([p * np.log2(p) for p in probabilities if p > 0])
    return ent

# Function to calculate information gain
def information_gain(X_column, y, feature_name):
    parent_entropy = entropy(y)
    values, counts = np.unique(X_column, return_counts=True)
    
    weighted_entropy = sum((counts[i] / sum(counts)) * entropy(y[X_column == values[i]]) for i in range(len(values)))
    gain = parent_entropy - weighted_entropy

    print(f"\n  📌 Feature: {feature_name}")
    print(f"  🔹 Parent Entropy: {parent_entropy:.4f}")
    for i in range(len(values)):
        subset_entropy = entropy(y[X_column == values[i]])
        print(f"    ├─ Value '{values[i]}' → Entropy: {subset_entropy:.4f}")
    print(f"  🔹 Information Gain: {gain:.4f}")

    return gain

# ID3 Algorithm
def id3(X, y, features, depth=0):
    indent = "    " * depth  # Indentation for clarity
    print(f"\n{indent}📍 ID3 Depth: {depth}, Features: {features}")

    # If all labels are the same, return that label (pure node)
    if len(set(y)) == 1:
        print(f"{indent}✅ Leaf Node → Class: {y[0]}")
        return y[0]

    # If no features left, return the majority class
    if len(features) == 0:
        majority_class = Counter(y).most_common(1)[0][0]
        print(f"{indent}✅ Leaf Node → Majority Class: {majority_class}")
        return majority_class

    # Calculate information gain for each feature
    gains = [information_gain(X[:, i], y, features[i]) for i in range(len(features))]
    best_feature = np.argmax(gains)

    print(f"\n{indent}🎯 Best Feature Selected: {features[best_feature]}")

    # Create the decision tree dictionary
    tree = {features[best_feature]: {}}
    unique_values = np.unique(X[:, best_feature])

    # Split based on the best feature
    for value in unique_values:
        sub_X = X[X[:, best_feature] == value]
        sub_y = y[X[:, best_feature] == value]

        print(f"{indent}🔀 Splitting on {features[best_feature]} = '{value}'")
        subtree = id3(sub_X, sub_y, np.delete(features, best_feature), depth + 1)
        tree[features[best_feature]][value] = subtree

    return tree

# User-defined dataset
data = {
    "Temperature": ["h", "h", "m", "m", "h", "m", "h", "m", "m", "h"],
    "Humidity": ["High", "High", "High", "High", "Medium", "High", "High", "High", "High", "Medium"],
    "Wind": ["s", "w", "w", "s", "w", "s", "w", "s", "s", "s"],
    "Rain": ["n", "y", "n", "n", "n", "y", "n", "n", "y", "n"],
    "Out": ["y", "y", "y", "n", "n", "n", "y", "y", "n", "y"]
}

df = pd.DataFrame(data)
X = df.iloc[:, :-1].values  # Features
y = np.array([1 if val == "y" else 0 for val in df["Out"]])  # Convert labels to 0/1
features = np.array(df.columns[:-1])

# Run the ID3 algorithm
decision_tree = id3(X, y, features)

print("\n🌳 Final Decision Tree:")
print(decision_tree)



📍 ID3 Depth: 0, Features: ['Temperature' 'Humidity' 'Wind' 'Rain']

  📌 Feature: Temperature
  🔹 Parent Entropy: 0.9710
    ├─ Value 'h' → Entropy: 0.7219
    ├─ Value 'm' → Entropy: 0.9710
  🔹 Information Gain: 0.1245

  📌 Feature: Humidity
  🔹 Parent Entropy: 0.9710
    ├─ Value 'High' → Entropy: 0.9544
    ├─ Value 'Medium' → Entropy: 1.0000
  🔹 Information Gain: 0.0074

  📌 Feature: Wind
  🔹 Parent Entropy: 0.9710
    ├─ Value 's' → Entropy: 1.0000
    ├─ Value 'w' → Entropy: 0.8113
  🔹 Information Gain: 0.0464

  📌 Feature: Rain
  🔹 Parent Entropy: 0.9710
    ├─ Value 'n' → Entropy: 0.8631
    ├─ Value 'y' → Entropy: 0.9183
  🔹 Information Gain: 0.0913

🎯 Best Feature Selected: Temperature
🔀 Splitting on Temperature = 'h'

    📍 ID3 Depth: 1, Features: ['Humidity' 'Wind' 'Rain']

  📌 Feature: Humidity
  🔹 Parent Entropy: 0.7219
    ├─ Value 'h' → Entropy: 0.7219
  🔹 Information Gain: 0.0000

  📌 Feature: Wind
  🔹 Parent Entropy: 0.7219
    ├─ Value 'High' → Entropy: -0.0000
    ├