In [3]:
import numpy as np
import pandas as pd
from collections import Counter

# importing Weather dataset
df = pd.read_csv("weather.csv")

# Function to calculate entropy
def entropy(target_col):
    values, counts = np.unique(target_col, return_counts=True)
    probs = counts / counts.sum()
    return -np.sum(probs * np.log2(probs))

# Function to calculate Information Gain
def information_gain(df, feature, target="Decision"):
    total_entropy = entropy(df[target])
    values, counts = np.unique(df[feature], return_counts=True)

    weighted_entropy = sum(
        (counts[i] / sum(counts)) * entropy(df[df[feature] == values[i]][target])
        for i in range(len(values))
    )
    return total_entropy - weighted_entropy

# ID3 Algorithm
def id3(df, features, target="Decision"):
    unique_classes = np.unique(df[target])

    # If all samples belong to one class, return that class
    if len(unique_classes) == 1:
        return unique_classes[0]

    # If no more features left to split on, return the majority class
    if len(features) == 0:
        return Counter(df[target]).most_common(1)[0][0]

    # Compute Information Gain for each feature
    gains = {feature: information_gain(df, feature, target) for feature in features}

    # Select feature with maximum Information Gain
    best_feature = max(gains, key=gains.get)

    # Create the decision tree
    tree = {best_feature: {}}
    for value in np.unique(df[best_feature]):
        subset = df[df[best_feature] == value].drop(columns=[best_feature])
        tree[best_feature][value] = id3(subset, [f for f in features if f != best_feature], target)

    return tree

# Run ID3 Algorithm
features = list(df.columns[:-1])  # Excluding the target column
decision_tree = id3(df, features)

# Print the decision tree
import pprint
pprint.pprint(decision_tree)


{'Outlook': {'Overcast': 'Yes',
             'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}},
             'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}}


In [4]:
pip install graphviz




In [5]:
import graphviz

# Create a Graphviz Digraph
dot = graphviz.Digraph(format='png')

# Root Node: Outlook
dot.node('Outlook', 'Outlook')

# Branches from Outlook
dot.node('Sunny', 'Sunny')
dot.node('Overcast', 'Overcast')
dot.node('Rain', 'Rain')
dot.edge('Outlook', 'Sunny')
dot.edge('Outlook', 'Overcast')
dot.edge('Outlook', 'Rain')

# Overcast always leads to "Yes"
dot.node('Yes1', 'Yes', shape='box', style='filled', fillcolor='lightgreen')
dot.edge('Overcast', 'Yes1')

# Sunny Branch -> Humidity Check
dot.node('Humidity', 'Humidity')
dot.edge('Sunny', 'Humidity')

# Humidity splits into High and Normal
dot.node('High', 'High')
dot.node('Normal', 'Normal')
dot.edge('Humidity', 'High')
dot.edge('Humidity', 'Normal')

# High Humidity -> No, Normal Humidity -> Yes
dot.node('No1', 'No', shape='box', style='filled', fillcolor='lightcoral')
dot.node('Yes2', 'Yes', shape='box', style='filled', fillcolor='lightgreen')
dot.edge('High', 'No1')
dot.edge('Normal', 'Yes2')

# Rain Branch -> Wind Check
dot.node('Wind', 'Wind')
dot.edge('Rain', 'Wind')

# Wind splits into Weak and Strong
dot.node('Weak', 'Weak')
dot.node('Strong', 'Strong')
dot.edge('Wind', 'Weak')
dot.edge('Wind', 'Strong')

# Weak Wind -> Yes, Strong Wind -> No
dot.node('Yes3', 'Yes', shape='box', style='filled', fillcolor='lightgreen')
dot.node('No2', 'No', shape='box', style='filled', fillcolor='lightcoral')
dot.edge('Weak', 'Yes3')
dot.edge('Strong', 'No2')

# Render and display the tree
dot.render('decision_tree')
dot.view()


'decision_tree.png'