In [None]:
import pandas as pd
import numpy as np
import math

class Node:
    def __init__(self):
        self.children = []
        self.value = ""
        self.isLeaf = False
        self.pred = ""

def entropy(data):
    yes = 0.0
    no = 0.0
    for _, rows in data.iterrows():
        if rows["Play Tennis"] == "Yes":
            yes += 1
        elif rows["Play Tennis"] == "No":
            no += 1
    if yes == 0.0 or no == 0.0:
        return 0
    else:
        py = yes / (yes + no)
        pn = no / (yes + no)
        return -(py * math.log(py, 2) + pn * math.log(pn, 2))

def info_gain(dataset, feature):
    attributes = np.unique(dataset[feature])
    gain = entropy(dataset)
    for attr in attributes:
        subdata = dataset[dataset[feature] == attr]
        sub_e = entropy(subdata)
        gain -= (float(len(subdata)) / float(len(dataset)) * sub_e)
    return gain

def ID3(dataset, features):
    root = Node()
    max_gain = 0
    max_feature = ""
    for feature in features:
        gain = info_gain(dataset, feature)
        if gain > max_gain:
            max_gain = gain
            max_feature = feature
    root.value = max_feature
    at = np.unique(dataset[max_feature])
    for a in at:
        subdata = dataset[dataset[max_feature] == a]
        if entropy(subdata) == 0.0:
            newNode = Node()
            newNode.isLeaf = True
            newNode.value = a
            newNode.pred = np.unique(subdata["Play Tennis"])
            root.children.append(newNode)
        else:
            dummyNode = Node()
            dummyNode.value = a
            new_attrs = features.copy()
            new_attrs.remove(max_feature)
            child = ID3(subdata, new_attrs)
            dummyNode.children.append(child)
            root.children.append(dummyNode)
    return root

def printTree(root: Node, depth=0):
    for i in range(depth):
        print("\t", end="")
    print(root.value, end="")
    if root.isLeaf:
        print(" -> ", root.pred)
    print()
    for child in root.children:
        printTree(child, depth + 1)

def classify(root: Node, new):
    for child in root.children:
        if child.value == new[root.value]:
            if child.isLeaf:
                print("Predicted Label for new example", new, " is:", child.pred)
                return
            else:
                classify(child.children[0], new)

dataset = pd.read_csv("PlayTennis.csv")
print("The DATASET")
print(dataset)
print("----------------------------")
features = [feat for feat in dataset if feat != "Play Tennis"]
root = ID3(dataset, features)
print("Decision Tree is:")
printTree(root)
print("----------------------------")




The DATASET
     Outlook Temperature Humidity    Wind Play Tennis
0      Sunny         Hot     High    Weak          No
1      Sunny         Hot     High  Strong          No
2   Overcast         Hot     High    Weak         Yes
3       Rain        Mild     High    Weak         Yes
4       Rain        Cool   Normal    Weak         Yes
5       Rain        Cool   Normal  Strong          No
6   Overcast        Cool   Normal  Strong         Yes
7      Sunny        Mild     High    Weak          No
8      Sunny        Cool   Normal    Weak         Yes
9       Rain        Mild   Normal    Weak         Yes
10     Sunny        Mild   Normal  Strong         Yes
11  Overcast        Mild     High  Strong         Yes
12  Overcast         Hot   Normal    Weak         Yes
13      Rain        Mild     High  Strong          No
----------------------------
Decision Tree is:
Outlook
	Overcast ->  ['Yes']

	Rain
		Wind
			Strong ->  ['No']

			Weak ->  ['Yes']

	Sunny
		Humidity
			High ->  ['No']

			Nor