In [11]:
import pandas as pd
from collections import Counter
import math
from pprint import pprint

df_tennis = pd.read_csv("p-tennis.csv")
attribute_names = list(df_tennis.columns)
attribute_names.remove("PlayTennis")
print(attribute_names)

def entropy_of_list(lst):
    count = Counter(x for x in lst)
    num_instances = len(lst)*1
    probs = [x/num_instances for x in count.values()]
    return entropy(probs)

def entropy(probs):
    return sum([-prob*math.log(prob,2) for prob in probs])

def info_gain(df,split_attribute,target_attribute):
    df_split = df.groupby(split_attribute)
    nobs = len(df.index)*1
    df_agg = df_split.agg({target_attribute:[entropy_of_list,lambda x:len(x)/nobs]})
    df_agg.columns = ['Entropy','propobservations']
    new  = sum(df_agg['Entropy'] * df_agg['propobservations'])
    old = entropy_of_list(df[target_attribute])
    print(split_attribute,'IG :',old-new)
    return old-new

def id3(df,target_attribute,attribute_names,default_class=0):
    count = Counter(x for x in df[target_attribute])
    if len(count)==1:
        return next(iter(count))
    elif df.empty or (not attribute_names):
        return default_class
    else:
        gain = [info_gain(df,attr,target_attribute) for attr in attribute_names]
        index_of_max = gain.index(max(gain))
        best_attr = attribute_names[index_of_max]
        tree = {best_attr:{}}
        remaining_attr = [i for i in attribute_names if i != best_attr]
        
        for attr_val,data_subset in df.groupby(best_attr):
            subtree = id3(data_subset,target_attribute,remaining_attr,default_class)
            tree[best_attr][attr_val] = subtree
        return tree
    
tree = id3(df_tennis,'PlayTennis',attribute_names)
print("\n\nResultant Tree is : ")
pprint(tree)

['Outlook', 'Temperature', 'Humidity', 'Windy']
Outlook IG : 0.2467498197744391
Temperature IG : 0.029222565658954647
Humidity IG : 0.15183550136234136
Windy IG : 0.04812703040826927
Temperature IG : 0.01997309402197489
Humidity IG : 0.01997309402197489
Windy IG : 0.9709505944546686
Temperature IG : 0.5709505944546686
Humidity IG : 0.9709505944546686
Windy IG : 0.01997309402197489


Resultant Tree is : 
{'Outlook': {'Overcast': 'yes',
             'Rainy': {'Windy': {False: 'yes', True: 'no'}},
             'Sunny': {'Humidity': {'High': 'no', 'Normal': 'yes'}}}}
