In [19]:
import pandas as pd
import math

In [20]:
def entropy(data):
    target_counts = data['Decision'].value_counts()
    total_samples = len(data)
    entropy = 0
    for target in target_counts.index:
        p = target_counts[target] / total_samples
        entropy -= p * math.log2(p)
    return entropy

In [21]:
def information_gain(data, feature):
    unique_values = data[feature].unique()
    total_entropy = entropy(data)
    split_entropy = 0

    for value in unique_values:
        subset = data[data[feature] == value]
        prob = len(subset) / len(data)
        split_entropy += prob * entropy(subset)

    return total_entropy - split_entropy

In [22]:
def find_best_split(data):
    best_information_gain = 0
    best_feature = None

    for feature in data.columns[:-1]:  # Exclude the target column
        ig = information_gain(data, feature)
        if ig > best_information_gain:
            best_information_gain = ig
            best_feature = feature

    return best_feature

In [23]:
def build_id3_tree(data, depth=0, max_depth=3):
    if depth == max_depth or data['Decision'].nunique() == 1:
        return data['Decision'].mode().iloc[0]

    best_feature = find_best_split(data)
    if best_feature is None:
        return data['Decision'].mode().iloc[0]

    tree = {'feature': best_feature, 'subtrees': {}}
    unique_values = data[best_feature].unique()
    for value in unique_values:
        subset = data[data[best_feature] == value]
        tree['subtrees'][value] = build_id3_tree(subset, depth+1, max_depth)

    return tree

In [24]:
def classify(instance, tree):
    if isinstance(tree, str):
        return tree
    feature = tree['feature']
    value = instance[feature]
    if value not in tree['subtrees']:
        return data['PlayTennis'].mode().iloc[0]
    return classify(instance, tree['subtrees'][value])

In [25]:
def id3(train_data_m, label):
    train_data = train_data_m.copy() #getting a copy of the dataset
    tree = {} #tree which will be updated
    class_list = train_data[label].unique() #getting unqiue classes of the label
    make_tree(tree, None, train_data, label, class_list) #start calling recursion
    return tree

In [26]:
data = pd.read_csv("../datasets/tree.csv").drop("Day", axis = 1)

In [27]:
data["Temp"] = pd.cut(x=data["Temp"], bins = [0,70,80,100], labels=["Cool", "Mild","Hot"])
data["Humidity"] = pd.cut(x=data["Humidity"], bins = [0,70,80,100], labels=["Low", "Normal","High"])

In [28]:
id3_tree = build_id3_tree(data, max_depth=3)

In [29]:
new_instance = {'Outlook': 'Sunny', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Weak'}
predicted_class = classify(new_instance, id3_tree)
print("Predicted class:", predicted_class)

Predicted class: No


In [30]:
def print_tree(tree, indent=""):
    if isinstance(tree, dict):
        feature = tree['feature']
        print(indent + f"Feature: {feature}")
        for value, subtree in tree['subtrees'].items():
            print(indent + f"  Value: {value}")
            print_tree(subtree, indent + "    ")
    else:
        print(indent + f"Class: {tree}")

In [31]:
print_tree(id3_tree)

Feature: Outlook
  Value: Sunny
    Feature: Humidity
      Value: High
        Class: No
      Value: Low
        Class: Yes
  Value: Overcast
    Class: Yes
  Value: Rain
    Feature: Wind
      Value: Weak
        Class: Yes
      Value: Strong
        Class: No
