In [18]:
import pandas as pd
import numpy as np

def entropy(target_col):
    elements, counts = np.unique(target_col, return_counts=True)
    entropy = np.sum([(-counts[i] / np.sum(counts)) * np.log2(counts[i] / np.sum(counts)) for i in range(len(elements))])
    return entropy


def InfoGain(data, split_attribute_name, target_name="class"):
    total_entropy = entropy(data[target_name])
    vals, counts = np.unique(data[split_attribute_name], return_counts=True)
    weighted_entropy = np.sum([(counts[i] / np.sum(counts)) * entropy(data.where(data[split_attribute_name] == vals[i]).dropna()[target_name]) for i in range(len(vals))])
    information_gain = total_entropy - weighted_entropy
    return information_gain


def ID3(data, originaldata, features, target_attribute_name="class", parent_node_class=None):
    if len(np.unique(data[target_attribute_name])) <= 1:
        return np.unique(data[target_attribute_name])[0]

    elif len(data) == 0:
        return np.unique(originaldata[target_attribute_name]) [np.argmax(np.unique(originaldata[target_attribute_name], return_counts=True)[1])]

    elif len(features) == 0:
        return parent_node_class

    else:
        parent_node_class = np.unique(data[target_attribute_name])[np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])]
        item_values = [InfoGain(data, feature, target_attribute_name) for feature in features]
        best_feature_index = np.argmax(item_values)
        best_feature = features[best_feature_index]
        tree = {best_feature: {}}
        features = [i for i in features if i != best_feature]
        for value in np.unique(data[best_feature]):
            value = value
            sub_data = data.where(data[best_feature] == value).dropna()
            subtree = ID3(sub_data, originaldata, features, target_attribute_name, parent_node_class)
            tree[best_feature][value] = subtree
        return tree

# Example usage (replace with your dataset)
data = pd.DataFrame({
    'outlook': ['sunny', 'sunny', 'overcast', 'rainy', 'rainy', 'rainy', 'overcast', 'sunny', 'sunny', 'rainy', 'sunny', 'overcast', 'overcast', 'rainy'],
    'temperature': ['hot', 'hot', 'hot', 'mild', 'cool', 'cool', 'cool', 'mild', 'cool', 'mild', 'mild', 'mild', 'hot', 'mild'],
    'humidity': ['high', 'high', 'high', 'high', 'normal', 'normal', 'normal', 'high', 'normal', 'normal', 'normal', 'high', 'normal', 'high'],
    'windy': ['false', 'true', 'false', 'false', 'false', 'true', 'true', 'false', 'false', 'false', 'true', 'true', 'false', 'true'],
    'class': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
})

features = data.columns[:-1]
tree = ID3(data, data, features)
tree

{'outlook': {'overcast': 'yes',
  'rainy': {'windy': {'false': 'yes', 'true': 'no'}},
  'sunny': {'humidity': {'high': 'no', 'normal': 'yes'}}}}

In [19]:
import numpy as np
import pandas as pd
from collections import Counter

# Load the weather dataset from Excel file
file_path = "/content/Weather_1.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")

def entropy(y):
    """Calculate entropy of a dataset"""
    counts = np.bincount(pd.factorize(y)[0])
    probabilities = counts / len(y)
    return -np.sum([p * np.log2(p) for p in probabilities if p > 0])

def information_gain(df, feature, target):
    """Calculate information gain for a given feature"""
    total_entropy = entropy(df[target])
    values, counts = np.unique(df[feature], return_counts=True)
    weighted_entropy = sum((counts[i] / sum(counts)) * entropy(df[df[feature] == values[i]][target]) for i in range(len(values)))
    return total_entropy - weighted_entropy

def id3(df, target, features):
    """Build the decision tree using ID3 algorithm"""
    if len(set(df[target])) == 1:
        return df[target].iloc[0]
    if not features:
        return Counter(df[target]).most_common(1)[0][0]

    best_feature = max(features, key=lambda f: information_gain(df, f, target))
    tree = {best_feature: {}}

    for value in df[best_feature].unique():
        subtree = id3(df[df[best_feature] == value], target, [f for f in features if f != best_feature])
        tree[best_feature][value] = subtree

    return tree

def print_tree(tree, indent=""):
    """Pretty print the decision tree"""
    if isinstance(tree, dict):
        for key, value in tree.items():
            print(f"{indent}{key}: {{")
            for subkey, subvalue in value.items():
                print(f"{indent}  {subkey}: {subvalue}")
            print(f"{indent}}}")
    else:
        print(f"{indent}{tree}")

# Build and print the decision tree
decision_tree = id3(df, 'Decision', list(df.columns[:-1]))
print_tree(decision_tree)

Temp: {
  hot : {'OutLook': {'sunny': {'Wind': {'weak': 'No', 'Strong': 'Yes'}}, 'overcast': 'Yes'}}
  mild: {'OutLook': {'Rain': {'Wind': {'weak': 'Yes', 'Strong': 'No'}}, 'sunny': 'Yes', 'overcast': 'Yes'}}
  cold: {'OutLook': {'Rain': {'Wind': {'weak': 'No', 'Strong': 'Yes'}}, 'overcast': 'No', 'sunny': 'Yes'}}
}
