In [1]:
import pandas as pd
import math
import numpy as np

# Load dataset
data = pd.read_csv("3-dataset.csv")

# Extract features
features = [feat for feat in data.columns]
features.remove("answer")

# Define Node class
class Node:
    def __init__(self):
        self.children = []
        self.value = ""
        self.isLeaf = False
        self.pred = ""

# Entropy function
def entropy(examples):
    pos = 0.0
    neg = 0.0
    for _, row in examples.iterrows():
        if row["answer"] == "yes":
            pos += 1
        else:
            neg += 1
    if pos == 0.0 or neg == 0.0:
        return 0.0
    else:
        p = pos / (pos + neg)
        n = neg / (pos + neg)
        return -(p * math.log(p, 2) + n * math.log(n, 2))

# Information Gain
def info_gain(examples, attr):
    uniq = np.unique(examples[attr])
    gain = entropy(examples)
    for u in uniq:
        subdata = examples[examples[attr] == u]
        sub_e = entropy(subdata)
        gain -= (float(len(subdata)) / float(len(examples))) * sub_e
    return gain

# ID3 Algorithm
def ID3(examples, attrs):
    root = Node()
    max_gain = 0
    max_feat = None

    for feature in attrs:
        gain = info_gain(examples, feature)
        if gain > max_gain:
            max_gain = gain
            max_feat = feature

    if max_feat is None:
        return root

    root.value = max_feat
    uniq = np.unique(examples[max_feat])

    for u in uniq:
        subdata = examples[examples[max_feat] == u]
        if entropy(subdata) == 0.0:
            newNode = Node()
            newNode.isLeaf = True
            newNode.value = u
            newNode.pred = subdata["answer"].iloc[0]
            root.children.append(newNode)
        else:
            dummyNode = Node()
            dummyNode.value = u
            new_attrs = attrs.copy()
            new_attrs.remove(max_feat)
            child = ID3(subdata, new_attrs)
            dummyNode.children.append(child)
            root.children.append(dummyNode)

    return root

# Tree printing function
def printTree(root: Node, depth=0):
    if root.isLeaf:
        print("\t" * depth + f"{root.value} -> {root.pred}")
        return
    print("\t" * depth + f"[{root.value}]")
    for child in root.children:
        printTree(child, depth + 1)

# Build and print decision tree
root = ID3(data, features)
printTree(root)


[outlook]
	overcast -> yes
	[rainy]
		[windy]
			False -> yes
			True -> no
	[sunny]
		[humidity]
			high -> no
			normal -> yes
