In [189]:
import pandas as pd
import math
import numpy as np

data = pd.read_csv("PlayTennis.csv")
features = [feat for feat in data]


In [190]:
data

Unnamed: 0,PlayTennis,Outlook,Temperature,Humidity,Wind
0,no,Sunny,Hot,High,Weak
1,no,Sunny,Hot,High,Strong
2,yes,Overcast,Hot,High,Weak
3,yes,Rain,Mild,High,Weak
4,yes,Rain,Cool,Normal,Weak
5,no,Rain,Cool,Normal,Strong
6,yes,Overcast,Cool,Normal,Strong
7,no,Sunny,Mild,High,Weak
8,yes,Sunny,Cool,Normal,Weak
9,yes,Rain,Mild,Normal,Weak


In [191]:
features

['PlayTennis', 'Outlook', 'Temperature', 'Humidity', 'Wind']

In [192]:
features.remove("PlayTennis")

In [193]:
features

['Outlook', 'Temperature', 'Humidity', 'Wind']

In [194]:
class Node:
    def __init__(self):
        self.children = []
        self.value = ""
        self.isLeaf = False

In [195]:
# for understanding
import pandas as pd

dataset = {
  "name": ["Dhivya", "Mary", "Sunil"],
  "age": [20, 21, 22]
}

df = pd.DataFrame(dataset)

for index, row in df.iterrows():
  print(row["age"])

20
21
22


In [196]:
df

Unnamed: 0,name,age
0,Dhivya,20
1,Mary,21
2,Sunil,22


The iterrows() method generates an iterator object of the DataFrame, 
allowing us to iterate each row in the DataFrame.

Each iteration produces an index object and a row object (a Pandas Series object).

In [197]:
def entropy(examples):
    pos = 0.0
    neg = 0.0
    for _, row in examples.iterrows():
        if row["PlayTennis"] == "yes":
            pos += 1
        else:
            neg += 1
    if pos == 0.0 or neg == 0.0:
        return 0.0
    else:
        p = pos / (pos + neg)
        n = neg / (pos + neg)
        return -(p * math.log(p, 2) + n * math.log(n, 2))

In [198]:
def info_gain(examples, attr):
    uniq = np.unique(examples[attr])
    print ("\nunique:",uniq)
    gain = entropy(examples)
    print ("\ngain:",gain)
    for u in uniq:
        subdata = examples[examples[attr] == u]
        print ("\nsubdata:",subdata)
        sub_e = entropy(subdata)
        gain -= (float(len(subdata)) / float(len(examples))) * sub_e
        print ("\ngain:",gain)
    return gain

In [199]:
def ID3(examples, attrs):
    root = Node()

    max_gain = 0
    max_feat = ""
    for feature in attrs:
        print ("\n",examples)
        gain = info_gain(examples, feature)
        if gain > max_gain:
            max_gain = gain
            max_feat = feature
    root.value = max_feat
    print ("\nMax feature attr",max_feat)
    uniq = np.unique(examples[max_feat])
    #print ("\n",uniq)
    for u in uniq:
        #print ("\n",u)
        subdata = examples[examples[max_feat] == u]
        #print ("\n",subdata)
        if entropy(subdata) == 0.0:
            newNode = Node()
            newNode.isLeaf = True
            newNode.value = u
            newNode.pred = np.unique(subdata["PlayTennis"])
            root.children.append(newNode)
        else:
            dummyNode = Node()
            dummyNode.value = u
            new_attrs = attrs.copy()
            new_attrs.remove(max_feat)
            child = ID3(subdata, new_attrs)
            dummyNode.children.append(child)
            root.children.append(dummyNode)

    return root

In [200]:
def printTree(root: Node, depth=0):
    for i in range(depth):
        print("\t", end="")
    print(root.value, end="")
    if root.isLeaf:
        print(" -> ", root.pred)
    print()
    for child in root.children:
        printTree(child, depth + 1)

In [201]:
def classify(root: Node, new):
    for child in root.children:
        if child.value == new[root.value]:
            if child.isLeaf:
                print ("Predicted Label for new example", new," is:", child.pred)
                exit
            else:
                classify (child.children[0], new)

In [202]:
root = ID3(data, features)
print("Decision Tree is:")
printTree(root)
print ("------------------")

new = {"Outlook":"Sunny", "Temperature":"Hot", "Humidity":"Normal", "Wind":"Strong"}
classify (root, new)


    PlayTennis   Outlook Temperature Humidity    Wind
0          no     Sunny         Hot     High    Weak
1          no     Sunny         Hot     High  Strong
2         yes  Overcast         Hot     High    Weak
3         yes      Rain        Mild     High    Weak
4         yes      Rain        Cool   Normal    Weak
5          no      Rain        Cool   Normal  Strong
6         yes  Overcast        Cool   Normal  Strong
7          no     Sunny        Mild     High    Weak
8         yes     Sunny        Cool   Normal    Weak
9         yes      Rain        Mild   Normal    Weak
10        yes     Sunny        Mild   Normal  Strong
11        yes  Overcast        Mild     High  Strong
12        yes  Overcast         Hot   Normal    Weak
13         no      Rain        Mild     High  Strong

unique: ['Overcast' 'Rain' 'Sunny']

gain: 0.9402859586706309

subdata:    PlayTennis   Outlook Temperature Humidity    Wind
2         yes  Overcast         Hot     High    Weak
6         yes  Overcast 