In [1]:
import numpy as np
import pandas as pd

def entropy(target_col):
    val,counts = np.unique(target_col,return_counts = True)
    ent = sum( (-counts[i]/np.sum(counts)) * np.log2( counts[i]/np.sum(counts) ) for i in range(len(val)))
    return ent

def infoGain(data,features,target):
    te = entropy(data[target])
    val,counts = np.unique(data[features],return_counts = True)
    eg = sum((counts[i]/sum(counts)) * entropy(data[data[features] == val[i]][target] ) for i in range(len(val)))
    InfoGain = te-eg
    return InfoGain

def ID3(data,features,target,pnode):
    if len(np.unique(data[target])) == 1:
        return np.unique(data[target])[0]
    elif len(features) == 0:
        return pnode
    else:
        pnode = np.unique(data[target])[np.argmax(np.unique(data[target])[1])]
        IG = [infoGain(data,f,target) for f in features]
        index = np.argmax(IG)
        col = features[index]
        tree = {col:{}}
        features = [f for f in features if f!=col]
        for val in np.unique(data[col]):
            sub_data = data[data[col]==val].dropna()
            subtree = ID3(sub_data,features,target,pnode)
            tree[col][val] = subtree
        return tree

def predict(query,tree,default = 1):

    for key in list(query.keys()):
        if key in list(tree.keys()):
            try:
                result = tree[key][query[key]] 
            except:
                return default
  
            result = tree[key][query[key]]
            if isinstance(result,dict):
                return predict(query,result)

            else:
                return result 


data = pd.read_csv('tennis.csv')
testData = data.sample(frac = 0.1)
test = testData.to_dict('records')[0]

data.drop(testData.index, inplace=True)
print(data)

target = 'PlayTennis'
features = data.columns[data.columns!=target]
tree = ID3(data,features,target,None)
print()
print(tree)

print('Actual value =>', test['PlayTennis'])

print(test)
print('predicted value => ', predict(test, tree, 1.0) )

     Outlook Temperature Humidity  Windy PlayTennis
0      Sunny         Hot     High  False         No
1      Sunny         Hot     High   True         No
2   Overcast         Hot     High  False        Yes
3      Rainy        Mild     High  False        Yes
4      Rainy        Cool   Normal  False        Yes
5      Rainy        Cool   Normal   True         No
6   Overcast        Cool   Normal   True        Yes
7      Sunny        Mild     High  False         No
8      Sunny        Cool   Normal  False        Yes
10     Sunny        Mild   Normal   True        Yes
11  Overcast        Mild     High   True        Yes
12  Overcast         Hot   Normal  False        Yes
13     Rainy        Mild     High   True         No

{'Outlook': {'Overcast': 'Yes', 'Rainy': {'Windy': {False: 'Yes', True: 'No'}}, 'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}}
Actual value => Yes
{'Outlook': 'Rainy', 'Temperature': 'Mild', 'Humidity': 'Normal', 'Windy': False, 'PlayTennis': 'Yes'}
predicted v