# **Decision Tree**

In [2]:
import numpy as np
import pandas as pd

In [4]:
data = pd.read_csv("PlayTennis.csv")

data.drop("day", axis=1, inplace=True)

display(data)

Unnamed: 0,outlook,temperature,humidity,wind,play
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


In [5]:
def Entropy(Sample):
    positive = Sample[Sample['play'] == "Yes"]
    negative = Sample[Sample['play'] == "No"]
    
    if(positive.shape[0] == 0 or negative.shape[0] == 0):
        return 0
    
    Entropy = -(positive.shape[0]/Sample.shape[0]) * np.log2(positive.shape[0]/Sample.shape[0]) \
              -(negative.shape[0]/Sample.shape[0]) * np.log2(negative.shape[0]/Sample.shape[0])

    return Entropy

def Gain(Sample, Attribute):
    Gain = Entropy(Sample)
    for value in Sample[Attribute].unique():
        Gain -= (Sample[Sample[Attribute] == value].shape[0] / Sample.shape[0]) *\
                Entropy(Sample[Sample[Attribute] == value])

    return Gain

def ID3(data, target, attributes):
    tree = {}
    targetCounts = data[target].value_counts()

    if data[target].eq("Yes").all():
        return "Yes"
    elif data[target].eq("No").all():
        return "No"
    elif len(attributes) == 0:
        return "Yes" if targetCounts["Yes"] > targetCounts["No"] else "No"
    else:
        Gains = [Gain(data, a) for a in attributes]
        best = attributes[Gains.index(max(Gains))]
        tree = {best: {}}
        attributes.remove(best)
        for v in data[best].unique():
            xv = data.loc[data[best] == v]
            if len(xv) == 0:
                tree[best].update({v: "Yes" if targetCounts["Yes"] > targetCounts["No"] else "No"})
            tree[best].update({v: ID3(xv, target, attributes)})
            
    return tree

def visualize(tree, indent=2):
    if type(tree) == dict:
        for k, v in tree.items():
            print(" " * indent + f"{k}:")
            visualize(v, indent+2)
    else:
        print(" " * indent + repr(tree))

In [6]:
target = "play"
attributes = list(data.columns[:-1])

tree = ID3(data, target, attributes)

In [7]:
display(tree)

{'outlook': {'Sunny': {'humidity': {'High': 'No', 'Normal': 'Yes'}},
  'Overcast': 'Yes',
  'Rain': {'wind': {'Weak': 'Yes', 'Strong': 'No'}}}}

In [8]:
visualize(tree)

  outlook:
    Sunny:
      humidity:
        High:
          'No'
        Normal:
          'Yes'
    Overcast:
      'Yes'
    Rain:
      wind:
        Weak:
          'Yes'
        Strong:
          'No'
