In [1]:
import pandas as pd
import math


In [2]:
data = {
    "Outlook": ["Sunny","Sunny","Overcast","Rain","Rain","Rain","Overcast","Sunny",
                "Sunny","Rain","Sunny","Overcast","Overcast","Rain"],
    "Temperature": ["Hot","Hot","Hot","Mild","Cool","Cool","Mild","Cool",
                    "Mild","Mild","Mild","Mild","Hot","Mild"],
    "Humidity": ["High","High","High","High","Normal","Normal","High","Normal",
                 "Normal","Normal","High","Normal","Normal","High"],
    "Wind": ["Weak","Strong","Weak","Weak","Weak","Strong","Strong","Weak",
             "Weak","Weak","Strong","Strong","Weak","Strong"],
    "PlayTennis": ["No","No","Yes","Yes","Yes","No","Yes","No",
                   "Yes","Yes","Yes","Yes","Yes","No"]
}

df = pd.DataFrame(data)
df


Unnamed: 0,Outlook,Temperature,Humidity,Wind,PlayTennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Mild,High,Strong,Yes
7,Sunny,Cool,Normal,Weak,No
8,Sunny,Mild,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


In [3]:
def entropy(target_col):
    values = target_col.value_counts()
    total = len(target_col)
    ent = 0
    for count in values:
        p = count / total
        ent -= p * math.log2(p)
    return ent


In [4]:
def information_gain(data, feature, target):
    total_entropy = entropy(data[target])
    values = data[feature].unique()
    weighted_entropy = 0

    for value in values:
        subset = data[data[feature] == value]
        weighted_entropy += (len(subset) / len(data)) * entropy(subset[target])

    return total_entropy - weighted_entropy


In [5]:
def find_best_feature(data, target):
    gains = {}
    for feature in data.columns[:-1]:
        gains[feature] = information_gain(data, feature, target)
    return max(gains, key=gains.get)


In [6]:
def id3(data, target):
    if len(data[target].unique()) == 1:
        return data[target].iloc[0]

    if len(data.columns) == 1:
        return data[target].mode()[0]

    best_feature = find_best_feature(data, target)
    tree = {best_feature: {}}

    for value in data[best_feature].unique():
        subset = data[data[best_feature] == value].drop(columns=[best_feature])
        tree[best_feature][value] = id3(subset, target)

    return tree


In [7]:
decision_tree = id3(df, "PlayTennis")
decision_tree


{'Outlook': {'Sunny': {'Temperature': {'Hot': 'No',
    'Cool': 'No',
    'Mild': 'Yes'}},
  'Overcast': 'Yes',
  'Rain': {'Wind': {'Weak': 'Yes', 'Strong': 'No'}}}}

In [8]:
def classify(tree, sample):
    if not isinstance(tree, dict):
        return tree

    feature = next(iter(tree))
    value = sample[feature]

    if value in tree[feature]:
        return classify(tree[feature][value], sample)
    else:
        return "Unknown"


In [9]:
new_sample = {
    "Outlook": "Sunny",
    "Temperature": "Cool",
    "Humidity": "High",
    "Wind": "Strong"
}

result = classify(decision_tree, new_sample)
print("Prediction for new sample:", result)


Prediction for new sample: No
