In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
golf = pd.read_csv('./golf.csv')
golf = golf[1:14]
print(golf.head())

    Outlook  Temp_Num Temp_Nom  Hum_Num Hum_Nom  Windy Play
1  overcast        64     cool       65  normal   True  yes
2  overcast        72     mild       90    high   True  yes
3  overcast        81      hot       75  normal  False  yes
4     rainy        70     mild       96    high  False  yes
5     rainy        68     cool       80  normal  False  yes


In [3]:
columns = ["Outlook", "Temp_Nom", "Hum_Nom", "Windy", "Play"]
golf = golf[columns]
print(golf)

     Outlook Temp_Nom Hum_Nom  Windy Play
1   overcast     cool  normal   True  yes
2   overcast     mild    high   True  yes
3   overcast      hot  normal  False  yes
4      rainy     mild    high  False  yes
5      rainy     cool  normal  False  yes
6      rainy     cool  normal   True   no
7      rainy     mild  normal  False  yes
8      rainy     mild    high   True   no
9      sunny      hot    high  False   no
10     sunny      hot    high   True   no
11     sunny     mild    high  False   no
12     sunny     cool  normal  False  yes
13     sunny     mild  normal   True  yes


In [4]:
def entropy(y):
  if sum(y == 'no') == 0 or sum(y == 'yes') == 0:
    return 0

  E = - (sum(y == 'no') / len(y) * np.log2(sum(y == 'no') / len(y)) + sum(y == 'yes')/len(y) * np.log2(sum(y == 'yes') / len(y)))
  return E

In [5]:
print(entropy(golf.Play))

0.9612366047228759


In [6]:
def informationGain(left, right):
    before = pd.concat([left, right])
    eBefore = entropy(before.Play)
    print("Entropy Before:", eBefore)
    if len(left.Play) != 0:
        eLeft = entropy(left.Play)*(len(left.Play)/len(before.Play))
    else:
        eLeft = 0
    print("Entropy of Left:", eLeft)
    if len(right.Play) != 0:
        eRight = entropy(right.Play)*(len(right.Play)/len(before.Play))
    else:
        eRight = 0
    print("Entropy of Right:", eRight)
    eAfter = eLeft + eRight
    print("Entropy After:", eAfter)
    IG = eBefore - eAfter
    return IG

In [7]:
left = golf[golf.Windy == True]
right = golf[golf.Windy == False]
print(informationGain(left, right))

Entropy Before: 0.9612366047228759
Entropy of Left: 0.46153846153846156
Entropy of Right: 0.4647572292281859
Entropy After: 0.9262956907666475
0.03494091395622834


In [8]:
def split(df):
    features = [column for column in df.columns if column != "Play"]
    feature_dict = {}
    for feature in features:
        feature_dict[feature] = [value for value in df[feature].unique()]
    IG = 0
    splits = []
    for feature in features:
        for value in feature_dict[feature]:
            print(value)
            print("Best IG:",IG)
            left = df[df[feature] == value]
            right = df[df[feature] != value]
            temp_ig = informationGain(left, right)
            print("Temp IG:", temp_ig)
            if temp_ig > IG:
                bestSplit = (feature, value)
                IG = temp_ig
                print("New Best Information Gain:", IG)
                print(bestSplit)
    if IG != 0:
        left = df[df[bestSplit[0]] == bestSplit[1]]
        right = df[df[bestSplit[0]] != bestSplit[1]]
    if len(left.Play != 0):
        if entropy(left.Play) != 0:
            splits.append(split(left))
    if len(right.Play != 0):
        if entropy(right.Play) != 0:
            splits.append(split(right))
    splits.append(bestSplit)
    return splits

In [9]:
split(golf)

overcast
Best IG: 0
Entropy Before: 0.9612366047228759
Entropy of Left: 0.0
Entropy of Right: 0.7692307692307693
Entropy After: 0.7692307692307693
Temp IG: 0.1920058354921066
New Best Information Gain: 0.1920058354921066
('Outlook', 'overcast')
rainy
Best IG: 0.1920058354921066
Entropy Before: 0.9612366047228759
Entropy of Left: 0.3734425363287187
Entropy of Right: 0.5873440017999785
Entropy After: 0.9607865381286972
Temp IG: 0.00045006659417867745
sunny
Best IG: 0.1920058354921066
Entropy Before: 0.9612366047228759
Entropy of Left: 0.3734425363287187
Entropy of Right: 0.4992480765902356
Entropy After: 0.8726906129189543
Temp IG: 0.08854599180392153
cool
Best IG: 0.1920058354921066
Entropy Before: 0.9612366047228759
Entropy of Left: 0.2496240382951178
Entropy of Right: 0.6861295798879999
Entropy After: 0.9357536181831178
Temp IG: 0.025482986539758112
mild
Best IG: 0.1920058354921066
Entropy Before: 0.9612366047228759
Entropy of Left: 0.4238288464866875
Entropy of Right: 0.5305074578645

[[[('Windy', False)], ('Outlook', 'rainy')],
 [[[('Windy', False)], ('Outlook', 'overcast')], ('Outlook', 'sunny')],
 ('Hum_Nom', 'normal')]