In [None]:
import pandas as pd
import numpy as np
from collections import Counter
import math

# Function to calculate entropy
def entropy(y):
    counts = Counter(y)
    probabilities = [count / len(y) for count in counts.values()]
    return -sum(p * math.log2(p) for p in probabilities)

# Function to calculate information gain
def information_gain(data, feature, target):
    total_entropy = entropy(data[target])
    values = data[feature].unique()
    weighted_entropy = sum((len(data[data[feature] == v]) / len(data)) * entropy(data[data[feature] == v][target]) for v in values)
    return total_entropy - weighted_entropy

# Recursive ID3 algorithm to build the decision tree
def id3(data, features, target):
    # If all target values are the same, return the label
    if len(set(data[target])) == 1:
        return data[target].iloc[0]

    # If no features left, return the most common label
    if len(features) == 0:
        return data[target].mode()[0]

    # Choose the best feature based on information gain
    gains = {feature: information_gain(data, feature, target) for feature in features}
    best_feature = max(gains, key=gains.get)

    # Create tree node
    tree = {best_feature: {}}

    # Split dataset and recurse for each value of the best feature
    for value in data[best_feature].unique():
        subset = data[data[best_feature] == value]
        remaining_features = [f for f in features if f != best_feature]
        tree[best_feature][value] = id3(subset, remaining_features, target)

    return tree

# Function to print decision tree in a readable format
def print_tree(tree, indent=""):
    if not isinstance(tree, dict):
        print(indent + "→ " + str(tree))
        return

    for key, value in tree.items():
        print(indent + str(key))
        for sub_key, sub_tree in value.items():
            print(indent + " └─ " + str(sub_key))
            print_tree(sub_tree, indent + "    ")

# Load dataset
file_path = '/content/tennis.csv'
data = pd.read_csv(file_path)

# Apply ID3 algorithm
features = list(data.columns[:-1])  # All columns except the target
target = 'play'
decision_tree = id3(data, features, target)

# Print the decision tree
print_tree(decision_tree)

outlook
 └─ sunny
    humidity
     └─ high
        → no
     └─ normal
        → yes
 └─ overcast
    → yes
 └─ rainy
    windy
     └─ False
        → yes
     └─ True
        → no
