In [1]:
import numpy as np
import pandas as pd
eps = np.finfo(float).eps
from numpy import log2 as log
import binarytree as btree

In [2]:
d = {
    'Supplies': ['Low', 'High', 'Med','Low','Low','High','High','Med','Low','Low','Med','High'],
    'Weather': ['Sunny','Sunny', 'Cloudy', 'Raining','Cloudy','Sunny','Raining','Cloudy','Raining','Raining','Sunny','Sunny'],
    'Worked': ['Yes', 'Yes','Yes','Yes','No','No','No','Yes','Yes','No','No','Yes'],
    'Shopped': ['Yes', 'No','No','No','Yes','No','No','No','No','Yes','Yes','No']
}

# create an empty dataframe
df = pd.DataFrame(data=d)

In [3]:
entropy_node = 0  #Initialize Entropy
values = df.Shopped.unique()  #Unique objects - 'Yes', 'No'
for value in values:
    fraction = df.Shopped.value_counts()[value]/len(df.Shopped)  
    entropy_node += -fraction*np.log2(fraction)
    
def ent(df,attribute):
    target_variables = df.Shopped.unique()  #This gives all 'Yes' and 'No'
    variables = df[attribute].unique()    #This gives different features in that attribute (like 'Sweet')


    entropy_attribute = 0
    for variable in variables:
        entropy_each_feature = 0
        for target_variable in target_variables:
            num = len(df[attribute][df[attribute]==variable][df.Shopped ==target_variable]) #numerator
            den = len(df[attribute][df[attribute]==variable])  #denominator
            fraction = num/(den+eps)  #pi
            entropy_each_feature += -fraction*log(fraction+eps) #This calculates entropy for one feature like 'Sweet'
        fraction2 = den/len(df)
        entropy_attribute += -fraction2*entropy_each_feature   #Sums up all the entropy ETaste

    return(abs(entropy_attribute))

def find_entropy(df):
    Class = df.keys()[-1]   #To make the code generic, changing target variable class name
    entropy = 0
    values = df[Class].unique()
    for value in values:
        fraction = df[Class].value_counts()[value]/len(df[Class])
        entropy += -fraction*np.log2(fraction)
    return entropy
  
  
def find_entropy_attribute(df,attribute):
  Class = df.keys()[-1]   #To make the code generic, changing target variable class name
  target_variables = df[Class].unique()  #This gives all 'Yes' and 'No'
  variables = df[attribute].unique()    #This gives different features in that attribute (like 'Hot','Cold' in Temperature)
  entropy2 = 0
  for variable in variables:
      entropy = 0
      for target_variable in target_variables:
          num = len(df[attribute][df[attribute]==variable][df[Class] ==target_variable])
          den = len(df[attribute][df[attribute]==variable])
          fraction = num/(den+eps)
          entropy += -fraction*log(fraction+eps)
      fraction2 = den/len(df)
      entropy2 += -fraction2*entropy
  return abs(entropy2)


def find_winner(df):
    Entropy_att = []
    IG = []
    for key in df.keys()[:-1]:
#         Entropy_att.append(find_entropy_attribute(df,key))
        IG.append(find_entropy(df)-find_entropy_attribute(df,key))
    return df.keys()[:-1][np.argmax(IG)]
  
  
def get_subtable(df, node,value):
  return df[df[node] == value].reset_index(drop=True)


def buildTree(df,tree=None): 
    Class = df.keys()[-1]   #To make the code generic, changing target variable class name
    
    #Here we build our decision tree

    #Get attribute with maximum information gain
    node = find_winner(df)
    
    #Get distinct value of that attribute e.g Salary is node and Low,Med and High are values
    attValue = np.unique(df[node])
    
    #Create an empty dictionary to create tree    
    if tree is None:                    
        tree={}
        tree[node] = {}
    
   #We make loop to construct a tree by calling this function recursively. 
    #In this we check if the subset is pure and stops if it is pure. 

    for value in attValue:
        
        subtable = get_subtable(df,node,value)
        clValue,counts = np.unique(subtable['Shopped'],return_counts=True)                        
        
        if len(counts)==1:#Checking purity of subset
            tree[node][value] = clValue[0]                                                    
        else:        
            tree[node][value] = buildTree(subtable) #Calling the function recursively 
                   
    return tree

In [4]:
t = buildTree(df)
import pprint
pprint.pprint(t)

{'Supplies': {'High': 'No',
              'Low': {'Worked': {'No': 'Yes',
                                 'Yes': {'Weather': {'Raining': 'No',
                                                     'Sunny': 'Yes'}}}},
              'Med': {'Weather': {'Cloudy': 'No', 'Sunny': 'Yes'}}}}


In [None]:
#Referencia:
#https://medium.com/@lope.ai/decision-trees-from-scratch-using-id3-python-coding-it-up-6b79e3458de4
#se tomo el mismo script modificando solamente el dataframe usado.