# Exam Bonus Problem

## Problem 12.6

Implement Algorithm 4 for binary attributes and run it on a training set of your choice. You need to implement the actual learning algorithm on your own, you are not allowed to use libraries such as scikit-learn4 for that part.

In [None]:
class DecisionTreeNode:
    def __init__(self, attribute=None, classification=None):
        self.left = None
        self.right = None
        self.attribute = attribute  # The attribute used to split
        self.classification = classification  # For Leaf Nodes: True/False

    def is_leaf(self):
        return self.classification is not None

    def add_branch(self, value, subtree):
        # True is Right subbranch
        if value:
            self.right = subtree
        else:
            self.left = subtree

    def print_tree(self, depth=0, branch_label=""):
        indent = "│   " * depth

        if depth > 0:
            print(f"{indent[:-4]}├── {branch_label}")
            indent = "│   " * depth

        if self.is_leaf():
            print(f"{indent}→ {self.classification}")
        else:
            if depth == 0:
                print(f"Attribute {self.attribute}")
            else:
                print(f"{indent}Attribute {self.attribute}")

            # Print False branch first (left)
            if self.left:
                self.left.print_tree(depth + 1, "False")
            else:
                print(f"{indent}│   ├── False")
                print(f"{indent}│   │   → None")

            # Print True branch (right)
            if self.right:
                self.right.print_tree(depth + 1, "True")
            else:
                print(f"{indent}│   ├── True")
                print(f"{indent}│   │   → None")

In [53]:
import math
import random


def entropy(examples):
    if not examples:
        return 0

    positive_count = sum(1 for a in examples if a[-1] is True)
    q = positive_count / len(examples)

    if q == 0 or q == 1:
        return 0

    return -(q * math.log(q, 2) + (1 - q) * math.log(1 - q, 2))

In [54]:
def plurality_val(examples):
    subset_true = [line for line in examples if line[-1]]
    subset_false = [line for line in examples if not line[-1]]

    if len(subset_false) > len(subset_true):
        return False
    elif len(subset_true) > len(subset_false):
        return True
    else:
        return random.choice([True, False])

In [55]:
def importance(attribute, examples):
    subset_false = [a for a in examples if not a[attribute]]
    subset_true = [a for a in examples if a[attribute]]

    return (
        entropy(examples)
        - len(subset_false) / len(examples) * entropy(subset_false)
        - len(subset_true) / len(examples) * entropy(subset_true)
    )

In [56]:
def argmax(attributes, examples, importance):
    max_importance = float("-inf")
    best_attribute = None
    for a in attributes:
        importance_a = importance(a, examples)
        if importance_a > max_importance:
            max_importance = importance_a
            best_attribute = a
    return best_attribute

In [68]:
def dt_learning(examples, attributes, parent_examples):
    if len(examples) == 0:
        return DecisionTreeNode(classification=plurality_val(parent_examples))
    elif all(example[-1] == examples[0][-1] for example in examples):
        return DecisionTreeNode(classification=examples[0][-1])
    elif len(attributes) == 0:
        return DecisionTreeNode(classification=plurality_val(examples))
    else:
        A = argmax(attributes, examples, importance)
        tree = DecisionTreeNode(attribute=A)
        for value in [False, True]:
            new_examples = [e for e in examples if e[A] == value]
            subtree = dt_learning(new_examples, attributes - {A}, examples)
            tree.add_branch(value, subtree)
        return tree

### Test Dataset for decision tree learning

Format: [weather_sunny, weekend, holiday, go_outside]\
Attributes:\
    0 = Weather sunny\
    1 = Weekend\
    2 = Holiday\
Goal:\
    3 = Go_Outside



In [None]:
examples1 = [
    [True, True, False, True],  # Sunny, Weekend, Not Holiday -> Go Outside
    [True, False, False, False],  # Sunny, Weekday, Not Holiday -> Stay Inside
    [True, False, False, False],  # Rainy, Weekend, Not Holiday -> Stay Inside
    [False, False, False, False],  # Rainy, Weekday, Not Holiday -> Stay Inside
    [True, True, True, True],  # Sunny, Weekend, Holiday -> Go Outside
    [True, False, True, True],  # Sunny, Weekday, Holiday -> Go Outside
    [False, True, True, True],  # Rainy, Weekend, Holiday -> Go Outside
    [False, False, True, False],  # Rainy, Weekday, Holiday -> Stay Inside
    [True, True, False, True],  # Sunny, Weekend, Not Holiday -> Go Outside
]

# Attribute indices
attributes = {0, 1, 2}  # Weather_Sunny, Weekend, Holiday

columns = ["Weather_Sunny", "Weekend", "Holiday", "Go_Outside"]

print("Dataset 1:")
print(f"  {columns[0]}  |  {columns[1]}  |  {columns[2]}  |  {columns[3]}")
print("-" * 64)
for example in examples1:
    weather = str(example[0]).ljust(len(columns[0]))
    weekend = str(example[1]).ljust(len(columns[1]))
    holiday = str(example[2]).ljust(len(columns[2]))
    go_out = str(example[3]).ljust(len(columns[3]))
    print(f"      {weather}      |     {weekend}     |    {holiday}   |   {go_out}")

Dataset:
  Weather_Sunny  |  Weekend  |  Holiday  |  Go_Outside
----------------------------------------------------------------
      True               |     True        |    False     |   True      
      True               |     False       |    False     |   False     
      True               |     False       |    False     |   False     
      False              |     False       |    False     |   False     
      True               |     True        |    True      |   True      
      True               |     False       |    True      |   True      
      False              |     True        |    True      |   True      
      False              |     False       |    True      |   False     
      True               |     True        |    False     |   True      


In [None]:
tree = dt_learning(examples1, attributes, examples1)

tree.print_tree()

Attribute 1
├── False
│   Attribute 2
│   ├── False
│   │   → False
│   ├── True
│   │   Attribute 0
│   │   ├── False
│   │   │   → False
│   │   ├── True
│   │   │   → True
├── True
│   → True


In [95]:
sports_examples = [
    [True, True, True, True, True, True],  # Perfect conditions -> Do Sports
    [
        True,
        True,
        True,
        True,
        False,
        True,
    ],  # Good weather, time, energy, equipment -> Do Sports
    [True, True, True, False, True, False],  # No equipment -> No Sports
    [True, True, True, False, False, False],  # No equipment, no friend -> No Sports
    [True, True, False, True, True, False],  # No energy -> No Sports
    [True, True, False, True, False, False],  # No energy, no friend -> No Sports
    [True, True, False, False, True, False],  # No energy, no equipment -> No Sports
    [
        True,
        True,
        False,
        False,
        False,
        False,
    ],  # No energy, no equipment, no friend -> No Sports
    [True, False, True, True, True, False],  # No time -> No Sports
    [True, False, True, True, False, False],  # No time -> No Sports
    [True, False, True, False, True, False],  # No time, no equipment -> No Sports
    [
        True,
        False,
        True,
        False,
        False,
        False,
    ],  # No time, no equipment, no friend -> No Sports
    [True, False, False, True, True, False],  # No time, no energy -> No Sports
    [True, False, False, True, False, False],  # No time, no energy -> No Sports
    [
        True,
        False,
        False,
        False,
        True,
        False,
    ],  # No time, no energy, no equipment -> No Sports
    [
        True,
        False,
        False,
        False,
        False,
        False,
    ],  # No time, no energy, no equipment, no friend -> No Sports
    [False, True, True, True, True, False],  # Bad weather -> No Sports
    [False, True, True, True, False, False],  # Bad weather -> No Sports
    [False, True, True, False, True, False],  # Bad weather, no equipment -> No Sports
    [False, True, True, False, False, False],  # Bad weather, no equipment -> No Sports
    [False, True, False, True, True, False],  # Bad weather, no energy -> No Sports
    [False, True, False, True, False, False],  # Bad weather, no energy -> No Sports
    [
        False,
        True,
        False,
        False,
        True,
        False,
    ],  # Bad weather, no energy, no equipment -> No Sports
    [
        False,
        True,
        False,
        False,
        False,
        False,
    ],  # Bad weather, no energy, no equipment, no friend -> No Sports
    [False, False, True, True, True, False],  # Bad weather, no time -> No Sports
    [False, False, True, True, False, False],  # Bad weather, no time -> No Sports
    [
        False,
        False,
        True,
        False,
        True,
        False,
    ],  # Bad weather, no time, no equipment -> No Sports
    [
        False,
        False,
        True,
        False,
        False,
        False,
    ],  # Bad weather, no time, no equipment, no friend -> No Sports
    [
        False,
        False,
        False,
        True,
        True,
        False,
    ],  # Bad weather, no time, no energy -> No Sports
    [
        False,
        False,
        False,
        True,
        False,
        False,
    ],  # Bad weather, no time, no energy -> No Sports
    [
        False,
        False,
        False,
        False,
        True,
        False,
    ],  # Bad weather, no time, no energy, no equipment -> No Sports
    [False, False, False, False, False, False],  # Everything bad -> No Sports
    # Some additional realistic scenarios
    [True, True, True, True, True, True],  # Duplicate perfect day
    [True, True, True, True, False, True],  # Can do solo sports
    [False, True, True, True, True, True],  # Indoor sports on bad weather day
    [False, True, True, True, False, True],  # Indoor solo sports
    [True, False, True, True, True, True],  # Quick sports session
    [True, True, False, True, True, True],  # Light sports when tired
    [True, True, True, False, False, True],  # Bodyweight exercises
    [False, False, False, False, False, False],  # Lazy day
]

# Attribute indices
sports_attributes = {
    0,
    1,
    2,
    3,
    4,
}  # Good_Weather, Have_Time, Feel_Energetic, Equipment_Available, Friend_Available

# Display the dataset
columns = [
    "Good_Weather",
    "Have_Time",
    "Feel_Energetic",
    "Equipment_Available",
    "Friend_Available",
    "Do_Sports",
]
print("Sports Decision Dataset:")
print(
    f"  {columns[0]}  |  {columns[1]}  |  {columns[2]}  |  {columns[3]}  |  {columns[4]}  |  {columns[5]}"
)
print("-" * 120)
for example in sports_examples:
    weather = str(example[0]).ljust(len(columns[0]))
    time = str(example[1]).ljust(len(columns[1]))
    energy = str(example[2]).ljust(len(columns[2]))
    equipment = str(example[3]).ljust(len(columns[3]))
    friend = str(example[4]).ljust(len(columns[4]))
    sports = str(example[5]).ljust(len(columns[5]))
    print(
        f"  {weather}           |  {time}        |  {energy}         |  {equipment}            |  {friend}         |  {sports}"
    )

print(f"\nTotal examples: {len(sports_examples)}")
print(
    f"Positive examples (Do Sports): {sum(1 for e in sports_examples if e[-1] == True)}"
)
print(
    f"Negative examples (No Sports): {sum(1 for e in sports_examples if e[-1] == False)}"
)


Sports Decision Dataset:
  Good_Weather  |  Have_Time  |  Feel_Energetic  |  Equipment_Available  |  Friend_Available  |  Do_Sports
------------------------------------------------------------------------------------------------------------------------
  True                   |  True             |  True                   |  True                           |  True                     |  True     
  True                   |  True             |  True                   |  True                           |  False                    |  True     
  True                   |  True             |  True                   |  False                          |  True                     |  False    
  True                   |  True             |  True                   |  False                          |  False                    |  False    
  True                   |  True             |  False                  |  True                           |  True                     |  False    
  True           

In [88]:
tree = dt_learning(sports_examples, sports_attributes, sports_examples)

tree.print_tree()

Attribute 1
├── False
│   Attribute 0
│   ├── False
│   │   → False
│   ├── True
│   │   Attribute 2
│   │   ├── False
│   │   │   → False
│   │   ├── True
│   │   │   Attribute 3
│   │   │   ├── False
│   │   │   │   → False
│   │   │   ├── True
│   │   │   │   Attribute 4
│   │   │   │   ├── False
│   │   │   │   │   → False
│   │   │   │   ├── True
│   │   │   │   │   → False
├── True
│   Attribute 2
│   ├── False
│   │   Attribute 0
│   │   ├── False
│   │   │   → False
│   │   ├── True
│   │   │   Attribute 3
│   │   │   ├── False
│   │   │   │   → False
│   │   │   ├── True
│   │   │   │   Attribute 4
│   │   │   │   ├── False
│   │   │   │   │   → False
│   │   │   │   ├── True
│   │   │   │   │   → True
│   ├── True
│   │   Attribute 3
│   │   ├── False
│   │   │   Attribute 0
│   │   │   ├── False
│   │   │   │   → False
│   │   │   ├── True
│   │   │   │   Attribute 4
│   │   │   │   ├── False
│   │   │   │   │   → False
│   │   │   │   ├── True
│   │   │   │   │   → False
│ 