**3)Aim: To construct the Decision tree using the training data sets under supervised learning concept. Program: Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an appropriate data set for building the decision tree and apply this knowledge to classify a new sample**

In [1]:
import numpy as np
import pandas as pd
from collections import Counter

# Defining the dataset
data = [
    ["Outlook", "Temperature", "Humidity", "Wind", "PlayTennis"],
    ["Sunny", "Hot", "High", "Weak", "No"],
    ["Sunny", "Hot", "High", "Strong", "No"],
    ["Overcast", "Hot", "High", "Weak", "Yes"],
    ["Rain", "Mild", "High", "Weak", "Yes"],
    ["Rain", "Cool", "Normal", "Weak", "Yes"],
    ["Rain", "Cool", "Normal", "Strong", "No"],
    ["Overcast", "Cool", "Normal", "Strong", "Yes"],
    ["Sunny", "Mild", "High", "Weak", "No"],
    ["Sunny", "Cool", "Normal", "Weak", "Yes"],
    ["Rain", "Mild", "Normal", "Weak", "Yes"],
    ["Sunny", "Mild", "Normal", "Strong", "Yes"],
    ["Overcast", "Mild", "High", "Strong", "Yes"],
    ["Overcast", "Hot", "Normal", "Weak", "Yes"],
    ["Rain", "Mild", "High", "Strong", "No"]
]

# Creating a DataFrame from the dataset
df = pd.DataFrame(data[1:], columns=data[0])

def entropy(target_col):
    elements, counts = np.unique(target_col, return_counts=True)
    entropy = np.sum([(-counts[i]/np.sum(counts)) * np.log2(counts[i]/np.sum(counts)) for i in range(len(elements))])
    return entropy

def information_gain(data, split_attribute, target_attribute):
    total_entropy = entropy(data[target_attribute])
    values, counts = np.unique(data[split_attribute], return_counts=True)
    weighted_entropy = np.sum([(counts[i]/np.sum(counts)) * entropy(data[data[split_attribute] == values[i]][target_attribute]) for i in range(len(values))])
    information_gain = total_entropy - weighted_entropy
    return information_gain

def ID3(data, original_data, features, target_attribute, parent_node_class=None):
    if len(np.unique(data[target_attribute])) <= 1:
        return np.unique(data[target_attribute])[0]
    elif len(data) == 0:
        return np.unique(original_data[target_attribute])[np.argmax(np.unique(original_data[target_attribute], return_counts=True)[1])]
    elif len(features) == 0:
        return parent_node_class
    else:
        parent_node_class = np.unique(data[target_attribute])[np.argmax(np.unique(data[target_attribute], return_counts=True)[1])]
        item_values = [information_gain(data, feature, target_attribute) for feature in features]
        best_feature_index = np.argmax(item_values)
        best_feature = features[best_feature_index]
        tree = {best_feature: {}}
        features = [i for i in features if i != best_feature]
        for value in np.unique(data[best_feature]):
            value_sub_data = data[data[best_feature] == value]
            subtree = ID3(value_sub_data, original_data, features, target_attribute, parent_node_class)
            tree[best_feature][value] = subtree
        return tree

def classify(sample, tree):
    for attribute in list(sample.keys()):
        if attribute in tree.keys():
            try:
                result = tree[attribute][sample[attribute]]
            except KeyError:
                return None
            if isinstance(result, dict):
                return classify(sample, result)
            else:
                return result

# List of features
features = list(df.columns[:-1])

# Target attribute
target_attribute = df.columns[-1]

# Constructing the decision tree
decision_tree = ID3(df, df, features, target_attribute)

# Sample to classify
sample = {
    "Outlook": "Sunny",
    "Temperature": "Cool",
    "Humidity": "High",
    "Wind": "Strong"
}

# Classifying the sample
classification_result = classify(sample, decision_tree)

print("Constructed Decision Tree:")
print(decision_tree)

print("\nClassification Result for the Sample:")
print(classification_result)


Constructed Decision Tree:
{'Outlook': {'Overcast': 'Yes', 'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}}, 'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}}

Classification Result for the Sample:
No
