In [1]:
import numpy as np
import pandas as pd

# Calculate the Entropy of a dataset
def entropy(data):
    # Count the occurrences of each class in the target column
    class_counts = data.iloc[:, -1].value_counts()
    probabilities = class_counts / len(data)
    return -np.sum(probabilities * np.log2(probabilities))

# Calculate Information Gain of a feature
def information_gain(data, feature):
    # Calculate the total entropy of the dataset
    total_entropy = entropy(data)
    
    # Group the data by the feature and calculate the weighted average of the entropy of each subset
    feature_values = data[feature].value_counts()
    weighted_entropy = 0
    
    for value, count in feature_values.items():
        subset = data[data[feature] == value]
        weighted_entropy += (count / len(data)) * entropy(subset)
    
    # Information Gain is the reduction in entropy
    return total_entropy - weighted_entropy

# ID3 Algorithm to create a Decision Tree
def id3(data, features):
    # If all rows have the same class, return a leaf node
    if len(data.iloc[:, -1].unique()) == 1:
        return data.iloc[0, -1]
    
    # If no features left to split on, return the majority class
    if len(features) == 0:
        return data.iloc[:, -1].mode()[0]
    
    # Find the feature with the highest Information Gain
    gains = {feature: information_gain(data, feature) for feature in features}
    best_feature = max(gains, key=gains.get)
    
    # Create the tree
    tree = {best_feature: {}}
    
    # Remove the best feature from the list of features
    remaining_features = [feature for feature in features if feature != best_feature]
    
    # Split the data on the best feature and recursively build the tree
    for value in data[best_feature].unique():
        subset = data[data[best_feature] == value]
        tree[best_feature][value] = id3(subset, remaining_features)
    
    return tree

# Example dataset (replace with your own data)
data = pd.DataFrame({
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Rainy'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Mild', 'Mild', 'Cool', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'High', 'Low', 'Low', 'Low', 'Low', 'High'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No']
})

# List of features (excluding the target column)
features = data.columns[:-1].tolist()

# Build the Decision Tree using ID3
tree = id3(data, features)
print("Decision Tree:")
print(tree)


Decision Tree:
{'Outlook': {'Sunny': {'Temperature': {'Hot': 'No', 'Mild': 'Yes', 'Cool': 'Yes'}}, 'Overcast': 'Yes', 'Rainy': {'Humidity': {'High': {'Wind': {'Weak': 'Yes', 'Strong': 'No'}}, 'Low': 'No'}}}}
