<a href="https://colab.research.google.com/github/Nikhilesh-075/Machine-Learning/blob/main/ID3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import math
from collections import Counter

def entropy(labels):
    total = len(labels)
    counts = Counter(labels)
    ent = 0.0
    for label in counts:
        p = counts[label] / total
        ent -= p * math.log2(p)
    return ent

def information_gain(parent_labels, subsets):
    parent_entropy = entropy(parent_labels)
    total = len(parent_labels)
    weighted_entropy = sum((len(subset) / total) * entropy(subset) for subset in subsets)
    gain = parent_entropy - weighted_entropy
    return gain

def split_dataset(features, labels, feature_values):
    split = {}
    for value in feature_values:
        split[value] = [labels[i] for i in range(len(features)) if features[i] == value]
    return split

def unique_values(column):
    return list(set(column))

def id3(data, labels, features, depth=0):
    # If all labels are the same, return that label
    if labels.count(labels[0]) == len(labels):
        print("  " * depth + f"Leaf: {labels[0]}")
        return labels[0]

    # If no more features, return majority label
    if not features:
        majority = Counter(labels).most_common(1)[0][0]
        print("  " * depth + f"Leaf (majority): {majority}")
        return majority

    # Calculate information gain for each feature
    base_entropy = entropy(labels)
    print(f"{'  ' * depth}Current Entropy: {base_entropy:.4f}")
    gains = []
    for feature in features:
        values = unique_values(data[feature])
        subsets = [[labels[i] for i in range(len(labels)) if data[feature][i] == value] for value in values]
        gain = information_gain(labels, subsets)
        gains.append((gain, feature))
        print(f"{'  ' * depth}Info Gain for '{feature}': {gain:.4f}")

    # Choose best feature
    best_gain, best_feature = max(gains, key=lambda x: x[0])
    print("  " * depth + f"Best Feature: {best_feature}")

    tree = {best_feature: {}}
    values = unique_values(data[best_feature])
    for value in values:
        # Create subset of data
        sub_data = {}
        sub_labels = []
        for f in features:
            if f == best_feature:
                continue
            sub_data[f] = [data[f][i] for i in range(len(data[best_feature])) if data[best_feature][i] == value]
        sub_labels = [labels[i] for i in range(len(labels)) if data[best_feature][i] == value]

        print("  " * depth + f"Branch: {best_feature} = {value}")
        subtree = id3(sub_data, sub_labels, [f for f in features if f != best_feature], depth + 1)
        tree[best_feature][value] = subtree

    return tree

def print_tree(tree, indent=""):
    if isinstance(tree, dict):
        for key, branches in tree.items():
            for val, subtree in branches.items():
                print(indent + f"{key} = {val}:")
                print_tree(subtree, indent + "  ")
    else:
        print(indent + f"--> {tree}")

# Sample data
Outlook = ["Sunny", "Sunny", "Overcast", "Rain", "Rain", "Rain"]
Temperature = ["Hot", "Hot", "Hot", "Mild", "Cool", "Cool"]
PlayTennis = ["No", "No", "Yes", "Yes", "Yes", "No"]

# Prepare dataset
dataset = {
    "Outlook": Outlook,
    "Temperature": Temperature
}
features = list(dataset.keys())

# Run ID3
print("\n--- Building Decision Tree ---\n")
decision_tree = id3(dataset, PlayTennis, features)

print("\n--- Final Decision Tree ---")
print_tree(decision_tree)



--- Building Decision Tree ---

Current Entropy: 1.0000
Info Gain for 'Outlook': 0.5409
Info Gain for 'Temperature': 0.2075
Best Feature: Outlook
Branch: Outlook = Sunny
  Leaf: No
Branch: Outlook = Overcast
  Leaf: Yes
Branch: Outlook = Rain
  Current Entropy: 0.9183
  Info Gain for 'Temperature': 0.2516
  Best Feature: Temperature
  Branch: Temperature = Mild
    Leaf: Yes
  Branch: Temperature = Cool
    Leaf (majority): Yes

--- Final Decision Tree ---
Outlook = Sunny:
  --> No
Outlook = Overcast:
  --> Yes
Outlook = Rain:
  Temperature = Mild:
    --> Yes
  Temperature = Cool:
    --> Yes
