In [None]:
import pandas as pd
import math


def entropy(target_col):
    counts = target_col.value_counts()
    total = counts.sum()
    return -sum((count/total) * math.log2(count/total) for count in counts)


def information_gain(df, attribute, target="Answer"):
    total_entropy = entropy(df[target])
    values = df[attribute].unique()
    weighted_entropy = sum(
        (len(df[df[attribute] == value]) / len(df)) * entropy(df[df[attribute] == value][target])
        for value in values
    )
    return total_entropy - weighted_entropy


def id3(df, attributes, target="Answer"):

    if len(df[target].unique()) == 1:
        return df[target].iloc[0]


    if not attributes:
        return df[target].mode()[0]


    gains = {attr: information_gain(df, attr, target) for attr in attributes}
    best_attribute = max(gains, key=gains.get)

    tree = {best_attribute: {}}


    for value in df[best_attribute].unique():
        subset = df[df[best_attribute] == value]
        new_attributes = [attr for attr in attributes if attr != best_attribute]
        tree[best_attribute][value] = id3(subset, new_attributes, target)

    return tree


file_path = "/content/id3.csv"
df = pd.read_csv(file_path)


target = "Answer"
attributes = [col for col in df.columns if col != target]


decision_tree = id3(df, attributes, target)

print("Decision Tree:")
print(decision_tree)


Decision Tree:
{'Outlook': {'sunny': {'Humidity': {'high': 'no', 'normal': 'yes'}}, 'overcast': 'yes', 'rain': {'Wind': {'weak': 'yes', 'strong': 'no'}}}}


In [None]:
import pandas as pd
import math
from anytree import Node, RenderTree

# Function to calculate entropy
def entropy(target_col):
    counts = target_col.value_counts()
    total = counts.sum()
    return -sum((count/total) * math.log2(count/total) for count in counts)

# Function to calculate information gain
def information_gain(df, attribute, target="Answer"):
    total_entropy = entropy(df[target])
    values = df[attribute].unique()
    weighted_entropy = sum(
        (len(df[df[attribute] == value]) / len(df)) * entropy(df[df[attribute] == value][target])
        for value in values
    )
    return total_entropy - weighted_entropy

# Recursive function to build the decision tree
def id3(df, attributes, target="Answer", parent_node=None, branch_value=None):
    # If all values in the target column are the same, return that value as a leaf node
    if len(df[target].unique()) == 1:
        leaf = Node(f"{branch_value} -> {df[target].iloc[0]}", parent=parent_node)
        return leaf

    # If there are no attributes left, return the most common target value
    if not attributes:
        leaf = Node(f"{branch_value} -> {df[target].mode()[0]}", parent=parent_node)
        return leaf

    # Choose the best attribute
    gains = {attr: information_gain(df, attr, target) for attr in attributes}
    best_attribute = max(gains, key=gains.get)

    # Create root node if it's the first call
    if parent_node is None:
        root = Node(best_attribute)
    else:
        root = Node(f"{branch_value} -> {best_attribute}", parent=parent_node)

    # Split data based on best attribute and recurse
    for value in df[best_attribute].unique():
        subset = df[df[best_attribute] == value]
        new_attributes = [attr for attr in attributes if attr != best_attribute]
        id3(subset, new_attributes, target, root, value)

    return root

# Load dataset from CSV
file_path = "/content/id3.csv"  # Ensure you have the correct file path
df = pd.read_csv(file_path)

# Define attributes (excluding target column)
target = "Answer"  # Ensure the target column matches your dataset
attributes = [col for col in df.columns if col != target]

# Build decision tree and visualize
decision_tree_root = id3(df, attributes, target)

# Display decision tree in a hierarchical format
print("\nDecision Tree Structure:")
for pre, _, node in RenderTree(decision_tree_root):
    print(f"{pre}{node.name}")



Decision Tree Structure:
Outlook
├── sunny -> Humidity
│   ├── high -> no
│   └── normal -> yes
├── overcast -> yes
└── rain -> Wind
    ├── weak -> yes
    └── strong -> no
