In [3]:
import numpy as np
import csv
import math

def read_data(filename):
    with open(filename, 'r') as csvfile:
        datareader = csv.reader(csvfile)
        headers = next(datareader)
        return headers, np.array([row for row in datareader], dtype=object)

class Node:
    def __init__(self, attribute=""):
        self.attribute, self.children, self.answer = attribute, [], ""

def entropy(S):
    items, counts = np.unique(S, return_counts=True)
    probs = counts / S.size
    return -sum(probs * np.log2(probs)) if len(items) > 1 else 0

def subtables(data, col, delete=True):
    items = np.unique(data[:, col])
    dict = {i: data[data[:, col] == i] for i in items}
    return {k: np.delete(v, col, 1) if delete else v for k, v in dict.items()}

def gain_ratio(data, col):
    subsets = subtables(data, col, delete=False)
    total_entropy = entropy(data[:, -1])
    iv = sum(-(len(sub) / len(data)) * math.log2(len(sub) / len(data)) for sub in subsets.values() if len(sub))
    return (total_entropy - sum(len(sub) / len(data) * entropy(sub[:, -1]) for sub in subsets.values())) / iv if iv else 0

def create_node(data, metadata):
    if len(np.unique(data[:, -1])) == 1:
        node = Node()
        node.answer = data[0, -1]
        return node
    split = np.argmax([gain_ratio(data, col) for col in range(data.shape[1] - 1)])
    node = Node(metadata[split])
    for val, subset in subtables(data, split).items():
        node.children.append((val, create_node(subset, np.delete(metadata, split, 0))))
    return node

def print_tree(node, level=0):
    print("   " * level, "->" if node.answer else "", node.answer or node.attribute)
    for val, child in node.children:
        print("   " * (level + 1), val)
        print_tree(child, level + 2)

metadata, data = read_data(r"C:\Users\sarah\Downloads\PlayTennis.csv")
print_tree(create_node(data, metadata))



  Outlook
    Overcast
       -> Yes
    Rain
        Wind
          Strong
             -> No
          Weak
             -> Yes
    Sunny
        Humidity
          High
             -> No
          Normal
             -> Yes
