# Exam Bonus Problem

## Problem 12.6

Implement Algorithm 4 for binary attributes and run it on a training set of your choice. You need to implement the actual learning algorithm on your own, you are not allowed to use libraries such as scikit-learn4 for that part.

___

New decision tree nodes which store the attribute for the decision and the values in the leaf nodes as classification:

In [119]:
import math
import random

In [None]:
class DecisionTreeNode:
    def __init__(self, attribute=None, classification=None):
        self.left = None
        self.right = None
        self.attribute = attribute  # The attribute used to split
        self.classification = classification  # For Leaf Nodes: True/False

    def is_leaf(self):
        return self.classification is not None

    def add_branch(self, value, subtree):
        # True is Right subbranch
        if value:
            self.right = subtree
        else:
            self.left = subtree

    # New print method that visualizes the tree *1
    def print_tree(self, depth=0, branch_label="", attribute_names=None):
        indent = "│   " * depth

        if depth > 0:
            print(f"{indent[:-4]}├── {branch_label}")
            indent = "│   " * depth

        if self.is_leaf():
            print(f"{indent}→ {self.classification}")
        else:
            # Use attribute name if mapping is provided, otherwise use index
            attr_display = (
                attribute_names.get(self.attribute, f"Attribute_{self.attribute}")
                if attribute_names
                else f"Attribute {self.attribute}"
            )

            if depth == 0:
                print(f"{attr_display}")
            else:
                print(f"{indent}{attr_display}")

            # Print False branch first (left)
            if self.left:
                self.left.print_tree(depth + 1, "False", attribute_names)
            else:
                print(f"{indent}│   ├── False")
                print(f"{indent}│   │   → None")

            # Print True branch (right)
            if self.right:
                self.right.print_tree(depth + 1, "True", attribute_names)
            else:
                print(f"{indent}│   ├── True")
                print(f"{indent}│   │   → None")

In [120]:
def entropy(examples):
    if not examples:
        return 0

    positive_count = sum(1 for a in examples if a[-1] is True)
    q = positive_count / len(examples)

    if q == 0 or q == 1:
        return 0

    return -(q * math.log(q, 2) + (1 - q) * math.log(1 - q, 2))

In [104]:
def plurality_val(examples):
    subset_true = [line for line in examples if line[-1]]
    subset_false = [line for line in examples if not line[-1]]

    if len(subset_false) > len(subset_true):
        return False
    elif len(subset_true) > len(subset_false):
        return True
    else:
        return random.choice([True, False])

In [None]:
def importance(attribute, examples):
    subset_false = [a for a in examples if not a[attribute]]
    subset_true = [a for a in examples if a[attribute]]

    return (
        entropy(examples)
        - len(subset_false) / len(examples) * entropy(subset_false)
        - len(subset_true) / len(examples) * entropy(subset_true)
    )

In [106]:
def argmax(attributes, examples, importance):
    max_importance = float("-inf")
    best_attribute = None
    for a in attributes:
        importance_a = importance(a, examples)
        if importance_a > max_importance:
            max_importance = importance_a
            best_attribute = a
    return best_attribute

In [107]:
def dt_learning(examples, attributes, parent_examples):
    if len(examples) == 0:
        return DecisionTreeNode(classification=plurality_val(parent_examples))
    elif all(example[-1] == examples[0][-1] for example in examples):
        return DecisionTreeNode(classification=examples[0][-1])
    elif len(attributes) == 0:
        return DecisionTreeNode(classification=plurality_val(examples))
    else:
        A = argmax(attributes, examples, importance)
        tree = DecisionTreeNode(attribute=A)
        for value in [False, True]:
            new_examples = [e for e in examples if e[A] == value]
            subtree = dt_learning(new_examples, attributes - {A}, examples)
            tree.add_branch(value, subtree)
        return tree

### Dataset 1: Outdoor Activity Decision

A simple dataset predicting whether to go outside based on weather and calendar factors.
Columns:

- Weather_Sunny (0): `True` if sunny weather
- Weekend (1): `True` if weekend day
- Holiday (2): `True` if holiday
- Go_Outside (3): Target - `True` if decide to go outside

Size: 9 examples

*1


In [None]:
weather_examples = [
    [True, True, False, True],  # Sunny, Weekend, Not Holiday -> Go Outside
    [True, False, False, False],  # Sunny, Weekday, Not Holiday -> Stay Inside
    [True, False, False, False],  # Rainy, Weekend, Not Holiday -> Stay Inside
    [False, False, False, False],  # Rainy, Weekday, Not Holiday -> Stay Inside
    [True, True, True, True],  # Sunny, Weekend, Holiday -> Go Outside
    [True, False, True, True],  # Sunny, Weekday, Holiday -> Go Outside
    [False, True, True, True],  # Rainy, Weekend, Holiday -> Go Outside
    [False, False, True, False],  # Rainy, Weekday, Holiday -> Stay Inside
    [True, True, False, True],  # Sunny, Weekend, Not Holiday -> Go Outside
]

# Attribute indices
weather_attributes = {0, 1, 2}  # Weather_Sunny, Weekend, Holiday

weather_attribute_names = {
    0: "Weather_Sunny",
    1: "Weekend",
    2: "Holiday",
    3: "Go_Outside",
}

print("Dataset 1 (-> Go Outside):")
print(
    f"  {weather_attribute_names[0]}  |  {weather_attribute_names[1]}  |  {weather_attribute_names[2]}  |  {weather_attribute_names[3]}"
)
print("-" * 64)
for example in weather_examples:
    weather = str(example[0]).ljust(len(weather_attribute_names[0]))
    weekend = str(example[1]).ljust(len(weather_attribute_names[1]))
    holiday = str(example[2]).ljust(len(weather_attribute_names[2]))
    go_out = str(example[3]).ljust(len(weather_attribute_names[3]))
    print(f"      {weather}      |     {weekend}     |    {holiday}   |   {go_out}")

Dataset 1 (-> Go Outside):
  Weather_Sunny  |  Weekend  |  Holiday  |  Go_Outside
----------------------------------------------------------------
      True               |     True        |    False     |   True      
      True               |     False       |    False     |   False     
      True               |     False       |    False     |   False     
      False              |     False       |    False     |   False     
      True               |     True        |    True      |   True      
      True               |     False       |    True      |   True      
      False              |     True        |    True      |   True      
      False              |     False       |    True      |   False     
      True               |     True        |    False     |   True      


In [None]:
tree = dt_learning(weather_examples, weather_attributes, weather_examples)

tree.print_tree(attribute_names=weather_attribute_names)

Weekend
├── False
│   Holiday
│   ├── False
│   │   → False
│   ├── True
│   │   Weather_Sunny
│   │   ├── False
│   │   │   → False
│   │   ├── True
│   │   │   → True
├── True
│   → True


### Dataset 2: Sports Activity Decision

A dataset predicting whether to do sports based on various personal and environmental factors.
Columns:

- Good_Weather (0): `True` if weather is good for sports
- Have_Time (1): `True` if have available time
- Feel_Energetic (2): `True` if feeling energetic
- Equipment_Available (3): `True` if sports equipment is available
- Friend_Available (4): `True` if friend is available to join
- Do_Sports (5): Target - `True` if decide to do sports

Size: 39 examples

*1

In [None]:
sports_examples = [
    [True, True, True, True, True, True],
    [
        True,
        True,
        True,
        True,
        False,
        True,
    ],
    [True, True, True, False, True, False],
    [True, True, True, False, False, False],
    [True, True, False, True, True, False],
    [True, True, False, True, False, False],
    [True, True, False, False, True, False],
    [
        True,
        True,
        False,
        False,
        False,
        False,
    ],
    [True, False, True, True, True, False],
    [True, False, True, True, False, False],
    [True, False, True, False, True, False],
    [
        True,
        False,
        True,
        False,
        False,
        False,
    ],
    [True, False, False, True, True, False],
    [True, False, False, True, False, False],
    [
        True,
        False,
        False,
        False,
        True,
        False,
    ],
    [
        True,
        False,
        False,
        False,
        False,
        False,
    ],
    [False, True, True, True, True, False],
    [False, True, True, True, False, False],
    [False, True, True, False, True, False],
    [False, True, True, False, False, False],
    [False, True, False, True, True, False],
    [False, True, False, True, False, False],
    [
        False,
        True,
        False,
        False,
        True,
        False,
    ],
    [
        False,
        True,
        False,
        False,
        False,
        False,
    ],
    [False, False, True, True, True, False],
    [False, False, True, True, False, False],
    [
        False,
        False,
        True,
        False,
        True,
        False,
    ],
    [
        False,
        False,
        True,
        False,
        False,
        False,
    ],
    [
        False,
        False,
        False,
        True,
        True,
        False,
    ],
    [
        False,
        False,
        False,
        True,
        False,
        False,
    ],
    [
        False,
        False,
        False,
        False,
        True,
        False,
    ],
    [False, False, False, False, False, False],
    [True, True, True, True, True, True],
    [True, True, True, True, False, True],
    [False, True, True, True, True, True],
    [False, True, True, True, False, True],
    [True, False, True, True, True, True],
    [True, True, False, True, True, True],
    [True, True, True, False, False, True],
    [False, False, False, False, False, False],
]

# Attribute indices
sports_attributes = {
    0,
    1,
    2,
    3,
    4,
}  # Good_Weather, Have_Time, Feel_Energetic, Equipment_Available, Friend_Available

sports_attribute_names = {
    0: "Good_Weather",
    1: "Have_Time",
    2: "Feel_Energetic",
    3: "Equipment_Available",
    4: "Friend_Available",
    5: "Do Sports",
}

print("Sports Decision Dataset:")
print(
    f"  {sports_attribute_names[0]}  |  {sports_attribute_names[1]}  |  {sports_attribute_names[2]}  |  {sports_attribute_names[3]}  |  {sports_attribute_names[4]}  |  {sports_attribute_names[5]}"
)
print("-" * 120)
for example in sports_examples:
    weather = str(example[0]).ljust(len(sports_attribute_names[0]))
    time = str(example[1]).ljust(len(sports_attribute_names[1]))
    energy = str(example[2]).ljust(len(sports_attribute_names[2]))
    equipment = str(example[3]).ljust(len(sports_attribute_names[3]))
    friend = str(example[4]).ljust(len(sports_attribute_names[4]))
    sports = str(example[5]).ljust(len(sports_attribute_names[5]))
    print(
        f"  {weather}           |  {time}        |  {energy}         |  {equipment}            |  {friend}         |  {sports}"
    )

print(f"\nTotal examples: {len(sports_examples)}")
print(
    f"Positive examples (Do Sports): {sum(1 for e in sports_examples if e[-1] == True)}"
)
print(
    f"Negative examples (No Sports): {sum(1 for e in sports_examples if e[-1] == False)}"
)


Sports Decision Dataset:
  Good_Weather  |  Have_Time  |  Feel_Energetic  |  Equipment_Available  |  Friend_Available  |  Do Sports
------------------------------------------------------------------------------------------------------------------------
  True                   |  True             |  True                   |  True                           |  True                     |  True     
  True                   |  True             |  True                   |  True                           |  False                    |  True     
  True                   |  True             |  True                   |  False                          |  True                     |  False    
  True                   |  True             |  True                   |  False                          |  False                    |  False    
  True                   |  True             |  False                  |  True                           |  True                     |  False    
  True           

In [116]:
tree = dt_learning(sports_examples, sports_attributes, sports_examples)

tree.print_tree(attribute_names=sports_attribute_names)

Have_Time
├── False
│   Good_Weather
│   ├── False
│   │   → False
│   ├── True
│   │   Feel_Energetic
│   │   ├── False
│   │   │   → False
│   │   ├── True
│   │   │   Equipment_Available
│   │   │   ├── False
│   │   │   │   → False
│   │   │   ├── True
│   │   │   │   Friend_Available
│   │   │   │   ├── False
│   │   │   │   │   → False
│   │   │   │   ├── True
│   │   │   │   │   → True
├── True
│   Feel_Energetic
│   ├── False
│   │   Good_Weather
│   │   ├── False
│   │   │   → False
│   │   ├── True
│   │   │   Equipment_Available
│   │   │   ├── False
│   │   │   │   → False
│   │   │   ├── True
│   │   │   │   Friend_Available
│   │   │   │   ├── False
│   │   │   │   │   → False
│   │   │   │   ├── True
│   │   │   │   │   → False
│   ├── True
│   │   Equipment_Available
│   │   ├── False
│   │   │   Good_Weather
│   │   │   ├── False
│   │   │   │   → False
│   │   │   ├── True
│   │   │   │   Friend_Available
│   │   │   │   ├── False
│   │   │   │   │   → True
│   │   │ 

**Notes:**

*1 = Code/Dataset was generated with an LLM (Claude Sonnet 4) (This only applies to the Tree Printing and the Datasets)