In [2]:
import numpy as np


In [3]:
class TreeNode:
    def __init__(self, feature=None, value=None, leaf_class=None):
        self.feature = feature
        self.value = value
        self.leaf_class = leaf_class
        self.children = {}

    def add_child(self, value, child):
        self.children[value] = child

In [4]:
def entropy(y):
    classes, counts = np.unique(y, return_counts=True)
    entropy = 0
    total_samples = len(y)
    for count in counts:
        p = count / total_samples
        entropy -= p * np.log2(p)
    return entropy

In [5]:
def information_gain(X, y, feature_idx):
    total_entropy = entropy(y)
    feature_values, counts = np.unique(X[:, feature_idx], return_counts=True)
    weighted_entropy = 0
    total_samples = len(y)
    for value, count in zip(feature_values, counts):
        subset_y = y[X[:, feature_idx] == value]
        weighted_entropy += (count / total_samples) * entropy(subset_y)
    return total_entropy - weighted_entropy

In [6]:
def id3(X, y, features):
    if len(set(y)) == 1:
        return TreeNode(leaf_class=y[0])
    if len(features) == 0:
        return TreeNode(leaf_class=np.argmax(np.bincount(y)))

    best_feature_idx = max(features, key=lambda feature_idx: information_gain(X, y, feature_idx))
    best_feature_values = np.unique(X[:, best_feature_idx])

    root = TreeNode(feature=best_feature_idx)

    new_features = [feature for feature in features if feature != best_feature_idx]

    for value in best_feature_values:
        subset_X = X[X[:, best_feature_idx] == value]
        subset_y = y[X[:, best_feature_idx] == value]
        child_node = id3(subset_X, subset_y, new_features)
        root.add_child(value, child_node)

    return root

In [7]:
def predict(tree, x):
    if tree.leaf_class is not None:
        return tree.leaf_class
    value = x[tree.feature]
    if value in tree.children:
        return predict(tree.children[value], x)
    else:
        return np.argmax(np.bincount(y))

In [36]:
# Example usage:
data = np.array([
    ['Sunny', 'Hot','Weak', 'No'],
    ['Sunny', 'Hot','Strong', 'No'],
    ['Overcast', 'Hot','Weak', 'Yes'],
    ['Rainy', 'Mild','Weak','Yes'],
    ['Rainy', 'Cool','Weak', 'Yes'],
    ['Rainy','Cool','Strong','No'],
    ['Overcast','Cool','Strong','Yes'],
    ['Sunny','Mild','Weak','No'],
    ['Sunny','Cool','Weak','Yes'],
    ['Rainy','Mild','Weak','Yes'],
    ['Sunny','Mild','Strong','Yes'],
    ['Overcast','Mild','Strong','Yes'],
    ['Overcast','Hot','Weak','Yes'],
    ['Rainy','Mild','Storng','No'],
])

X = data[:, :-1]
y = np.array([1 if label == 'Yes' else 0 for label in data[:, -1]])

features = list(range(X.shape[1]))

tree = id3(X, y, features)



In [44]:
# Predict on new data
new_data = np.array([
    ['Overcast', 'Mild','Strong'],
    ['Rainy', 'Hot','Strong'],
])

In [45]:
for sample in new_data:
    print(f"Prediction for {sample}: {'yes' if predict(tree, sample) == 1 else 'no'}")


Prediction for ['Overcast' 'Mild' 'Strong']: yes
Prediction for ['Rainy' 'Hot' 'Strong']: no


In [39]:
def print_tree(tree, indent=''):
    if tree.leaf_class is not None:
        print(indent + 'Predict:', tree.leaf_class)
    else:
        print(indent + str(tree.feature))
        for value, child in tree.children.items():
            print(indent + '|__ ' + str(value))
            print_tree(child, indent + '|   ')

# Usage Example
print_tree(tree)


0
|__ Overcast
|   Predict: 1
|__ Rainy
|   2
|   |__ Storng
|   |   Predict: 0
|   |__ Strong
|   |   Predict: 0
|   |__ Weak
|   |   Predict: 1
|__ Sunny
|   1
|   |__ Cool
|   |   Predict: 1
|   |__ Hot
|   |   Predict: 0
|   |__ Mild
|   |   2
|   |   |__ Strong
|   |   |   Predict: 1
|   |   |__ Weak
|   |   |   Predict: 0
