In [35]:
import pandas as pd
import numpy as np
from math import log2

df = pd.read_csv("play_tennis.csv")

print("Dataset Preview:")
df.head()


Dataset Preview:


Unnamed: 0,day,outlook,temp,humidity,wind,play
0,D1,Sunny,Hot,High,Weak,No
1,D2,Sunny,Hot,High,Strong,No
2,D3,Overcast,Hot,High,Weak,Yes
3,D4,Rain,Mild,High,Weak,Yes
4,D5,Rain,Cool,Normal,Weak,Yes


In [36]:
def entropy(target_col):
    values, counts = np.unique(target_col, return_counts=True)
    entropy_value = 0

    for i in range(len(values)):
        prob = counts[i] / sum(counts)
        entropy_value -= prob * log2(prob)

    return entropy_value


In [37]:
entropy(df["play"])


0.9402859586706311

In [38]:
def information_gain(data, feature, target):
    total_entropy = entropy(data[target])
    values, counts = np.unique(data[feature], return_counts=True)

    weighted_entropy = 0
    for i in range(len(values)):
        subset = data[data[feature] == values[i]]
        weighted_entropy += (counts[i] / sum(counts)) * entropy(subset[target])

    return total_entropy - weighted_entropy


In [39]:
features = ['outlook', 'temp', 'humidity', 'wind']

for feature in features:
    print(feature, ":", information_gain(df, feature, "play"))


outlook : 0.24674981977443933
temp : 0.02922256565895487
humidity : 0.15183550136234159
wind : 0.04812703040826949


In [40]:
def id3(data, target, features):

    # Stop if only one class left
    if len(np.unique(data[target])) == 1:
        return np.unique(data[target])[0]

    # Stop if no features left
    if len(features) == 0:
        return data[target].mode()[0]

    gains = [information_gain(data, feature, target) for feature in features]
    best_feature = features[np.argmax(gains)]

    tree = {best_feature: {}}

    for value in np.unique(data[best_feature]):
        subset = data[data[best_feature] == value]
        remaining_features = [f for f in features if f != best_feature]
        subtree = id3(subset, target, remaining_features)
        tree[best_feature][value] = subtree

    return tree


In [41]:
decision_tree = id3(df, "play", features)
decision_tree


{'outlook': {'Overcast': 'Yes',
  'Rain': {'wind': {'Strong': 'No', 'Weak': 'Yes'}},
  'Sunny': {'humidity': {'High': 'No', 'Normal': 'Yes'}}}}

In [42]:
print("Decision Tree using ID3 Algorithm:\n")
print(decision_tree)


Decision Tree using ID3 Algorithm:

{'outlook': {'Overcast': 'Yes', 'Rain': {'wind': {'Strong': 'No', 'Weak': 'Yes'}}, 'Sunny': {'humidity': {'High': 'No', 'Normal': 'Yes'}}}}
