In [None]:
import pandas as pd
import numpy as np
import math

df = pd.read_csv('weather.csv')
print(df)

    Day   Outlook Temp. Humidity   Wind  Decision
0     1     Sunny   Hot     High    Weak       No
1     2     Sunny   Hot     High  Strong       No
2     3  Overcast   Hot     High    Weak      Yes
3     4      Rain  Mild     High    Weak      Yes
4     5      Rain  Cool   Normal    Weak      Yes
5     6      Rain  Cool   Normal  Strong       No
6     7  Overcast  Cool   Normal  Strong      Yes
7     8     Sunny  Mild     High    Weak       No
8     9     Sunny  Cool   Normal    Weak      Yes
9    10      Rain  Mild   Normal    Weak      Yes
10   11     Sunny  Mild   Normal  Strong      Yes
11   12  Overcast  Mild     High  Strong      Yes
12   13  Overcast   Hot   Normal    Weak      Yes
13   14      Rain  Mild     High  Strong       No


In [None]:
def entropy(target_col):
    entropy = 0
    values = target_col.unique()
    for value in values:
        p = (target_col == value).sum() / len(target_col)
        entropy += -p * math.log2(p)
    return entropy

def information_gain(data, feature, target):
    total_entropy = entropy(data[target])
    values = data[feature].unique()
    weighted_entropy = 0
    for value in values:
        subset = data[data[feature] == value]
        weighted_entropy += (len(subset) / len(data)) * entropy(subset[target])
    return total_entropy - weighted_entropy

def build_tree(data, target, features, level=0):
    if len(data[target].unique()) == 1:
        return data[target].unique()[0]

    if len(features) == 0:
        return data[target].mode()[0]

    best_feature = max(features, key=lambda feature: information_gain(data, feature, target))
    total_entropy = entropy(data[target])

    print("  " * level + f"Entropy({best_feature}) = {total_entropy:.4f}")

    tree = {best_feature: {}}
    features.remove(best_feature)

    for value in data[best_feature].unique():
        subset = data[data[best_feature] == value]
        subtree = build_tree(subset, target, features, level + 1)
        tree[best_feature][value] = subtree
    return tree

def print_tree(tree, feature_names=None, depth=0):
    if isinstance(tree, dict):
        feature = next(iter(tree))
        print("  " * depth + f"{feature}:")
        for value, subtree in tree[feature].items():
            print("  " * (depth + 1) + f"{value} ->", end=" ")
            print_tree(subtree, feature_names, depth + 2)
    else:
        if feature_names is not None:
            print(f"{tree}")
        else:
            print(f"{tree}")

def predict_decision(tree, data_point):
    if isinstance(tree, dict):
        feature = next(iter(tree))
        value = data_point[feature]
        subtree = tree[feature].get(value)
        if subtree is None:
            return data_point['Decision']
        return predict_decision(subtree, data_point)
    else:
        return tree

features = ['Outlook', 'Humidity', 'Wind ']
decision_tree = build_tree(df, 'Decision', features)
print("\nTree: ")
print_tree(decision_tree, feature_names=features)

predicted_point = {'Outlook': 'Rain', 'Temp': 'Mild', 'Humidity': 'Normal', 'Wind ': 'Weak', 'Decision': None}
decision = predict_decision(decision_tree, predicted_point)
print(f"\nPredicted Decision: {decision}")

Entropy(Outlook) = 0.9403
  Entropy(Humidity) = 0.9710
  Entropy(Wind ) = 0.9710

Tree: 
Outlook:
  Sunny ->     Humidity:
      High -> No
      Normal -> Yes
  Overcast -> Yes
  Rain ->     Wind :
      Weak -> Yes
      Strong -> No

Predicted Decision: Yes
