In [5]:
import math
class Node:
    def __init__(self, attribute):
        self.attribute = attribute
        self.children = {}
        self.answer = ""

def entropy(probabilities):
    return sum(-p * math.log2(p) for p in probabilities if p != 0)

def entropy(examples, target_attr):
    value_freq = {}
    data_entropy = 0.0

    for entry in examples:
        if entry[target_attr] in value_freq:
            value_freq[entry[target_attr]] += 1.0
        else:
            value_freq[entry[target_attr]]  = 1.0

    for freq in value_freq.values():
        data_entropy += (-freq/len(examples)) * math.log(freq/len(examples), 2)

    return data_entropy

def gain(examples, attr, target_attr):
    val_freq = {}
    subset_entropy = 0.0

    for entry in examples:
        if entry[attr] in val_freq:
            val_freq[entry[attr]] += 1.0
        else:
            val_freq[entry[attr]]  = 1.0

    for val in val_freq.keys():
        val_prob        = val_freq[val] / sum(val_freq.values())
        data_subset     = [entry for entry in examples if entry[attr] == val]
        subset_entropy += val_prob * entropy(data_subset, target_attr)

    return (entropy(examples, target_attr) - subset_entropy)

def choose_attribute(examples, attributes, target_attr):
    best_gain = 0.0
    best_attr = None

    for attr in attributes:
        new_gain = gain(examples, attr, target_attr)
        if new_gain > best_gain:
            best_gain = new_gain
            best_attr = attr

    return best_attr

def get_values(examples, attr):
    return list(set(entry[attr] for entry in examples))

def get_examples(examples, attr, value):
    return [entry for entry in examples if entry[attr] == value]

def build_tree(examples, attributes, target_attr):
    root = Node(None)

    if all(entry[target_attr] == 'Yes' for entry in examples):
        root.answer = 'Yes'
        return root

    if all(entry[target_attr] == 'No' for entry in examples):
        root.answer = 'No'
        return root

    if not attributes:
        root.answer = max(set(entry[target_attr] for entry in examples), key=list(entry[target_attr] for entry in examples).count)
        return root

    else:
        best = choose_attribute(examples, attributes, target_attr)
        root.attribute = best

        tree = Node(best)

        for val in get_values(examples, best):
            subtree = build_tree(
                get_examples(examples, best, val),
                [attr for attr in attributes if attr != best],
                target_attr)

            tree.children[val] = subtree

        return tree

def print_tree(tree, str):
    if tree.answer != "":
        print(str + " -> " + tree.answer)
        return
    for value in tree.children.keys():
        subtree = tree.children[value]
        print_tree(subtree, str + " -> (" + tree.attribute + " = " + value + ")")

dataset = [
    {'Day': 'D1', 'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak', 'Play ball': 'No'},
    {'Day': 'D2', 'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Strong', 'Play ball': 'No'},
    {'Day': 'D3', 'Outlook': 'Overcast', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak', 'Play ball': 'Yes'},
    {'Day': 'D4', 'Outlook': 'Rain', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Weak', 'Play ball': 'Yes'},
    {'Day': 'D5', 'Outlook': 'Rain', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Strong', 'Play ball': 'No'},
    {'Day': 'D6', 'Outlook': 'Rain', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Strong', 'Play ball': 'No'},
    {'Day': 'D7', 'Outlook': 'Overcast', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Strong', 'Play ball': 'Yes'},
    {'Day': 'D8', 'Outlook': 'Sunny', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Weak', 'Play ball': 'No'},
    {'Day': 'D9', 'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Weak', 'Play ball': 'Yes'},
    {'Day': 'D10', 'Outlook': 'Rain', 'Temperature': 'Mild', 'Humidity': 'Normal', 'Wind': 'Weak', 'Play ball': 'Yes'},
    {'Day': 'D11', 'Outlook': 'Sunny', 'Temperature': 'Mild', 'Humidity': 'Normal', 'Wind': 'Strong', 'Play ball': 'Yes'},
    {'Day': 'D12', 'Outlook': 'Overcast', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Strong', 'Play ball': 'Yes'},
    {'Day': 'D13', 'Outlook': 'Overcast', 'Temperature': 'Hot', 'Humidity': 'Normal', 'Wind': 'Weak', 'Play ball': 'Yes'},
    {'Day': 'D14', 'Outlook': 'Rain', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Strong', 'Play ball': 'No'}
]

attributes = ['Day', 'Outlook', 'Temperature', 'Humidity', 'Wind']

target_attr = 'Play ball'

decision_tree = build_tree(dataset, attributes, target_attr)

print_tree(decision_tree, "")

def entropy(probabilities):
    return sum(-p * math.log2(p) for p in probabilities if p != 0)

def entropy(examples, target_attr):
    value_freq = {}
    data_entropy = 0.0

    for entry in examples:
        if entry[target_attr] in value_freq:
            value_freq[entry[target_attr]] += 1.0
        else:
            value_freq[entry[target_attr]]  = 1.0

    for freq in value_freq.values():
        data_entropy += (-freq/len(examples)) * math.log(freq/len(examples), 2)

    return data_entropy

entropy_sweak = entropy(get_examples(dataset, 'Wind', 'Weak'), target_attr='Play ball')
entropy_sstrong = entropy(get_examples(dataset, 'Wind', 'Strong'), target_attr='Play ball')

entropy_s = 0.940

gain_s_wind = entropy_s - (8/14)*entropy_sweak - (6/14)*entropy_sstrong

print(f"Gain(S,Wind) = Entropy(S) - (8/14)*Entropy(Sweak) - (6/14)*Entropy(Sstrong)")
print(f"= {entropy_s} - (8/14)*{entropy_sweak} - (6/14)*{entropy_sstrong}")
print(f"= {gain_s_wind:.3f}")

print(f"Entropy(Sweak) = - (6/8)*log2(6/8) - (2/8)*log2(2/8) = {entropy_sweak:.3f}")
print(f"Entropy(Sstrong) = - (3/6)*log2(3/6) - (3/6)*log2(3/6) = {entropy_sstrong:.3f}")

 -> (Day = D6) -> No
 -> (Day = D14) -> No
 -> (Day = D13) -> Yes
 -> (Day = D10) -> Yes
 -> (Day = D1) -> No
 -> (Day = D5) -> No
 -> (Day = D4) -> Yes
 -> (Day = D3) -> Yes
 -> (Day = D2) -> No
 -> (Day = D7) -> Yes
 -> (Day = D9) -> Yes
 -> (Day = D8) -> No
 -> (Day = D12) -> Yes
 -> (Day = D11) -> Yes
Gain(S,Wind) = Entropy(S) - (8/14)*Entropy(Sweak) - (6/14)*Entropy(Sstrong)
= 0.94 - (8/14)*0.863120568566631 - (6/14)*0.9852281360342516
= 0.025
Entropy(Sweak) = - (6/8)*log2(6/8) - (2/8)*log2(2/8) = 0.863
Entropy(Sstrong) = - (3/6)*log2(3/6) - (3/6)*log2(3/6) = 0.985


In [6]:
def classify(sample, decision_tree_rules):
    for rule in decision_tree_rules:
        if all(sample[attr] == value for attr, value in rule['conditions'].items()):
            return rule['outcome']
    return "Unknown"

decision_tree_rules = [
    {'conditions': {'Outlook': 'Sunny', 'Humidity': 'High'}, 'outcome': 'No'},
    {'conditions': {'Outlook': 'Sunny', 'Humidity': 'Normal'}, 'outcome': 'Yes'},
    {'conditions': {'Outlook': 'Overcast'}, 'outcome': 'Yes'},
    {'conditions': {'Outlook': 'Rain', 'Wind': 'Weak'}, 'outcome': 'Yes'},
    {'conditions': {'Outlook': 'Rain', 'Wind': 'Strong'}, 'outcome': 'No'},
]

new_sample = {'Outlook': 'Rain', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Strong'}
classification = classify(new_sample, decision_tree_rules)
print(f"The classification for the new sample is: {classification}")

The classification for the new sample is: No
