<a href="https://colab.research.google.com/github/Sai63583/ML-LAB/blob/main/ML_EX_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from collections import Counter
import math

class Node:
    def __init__(self, attribute=None, label=None):
        self.attribute = attribute  # Attribute split at this node
        self.children = {}          # Dictionary to store children nodes
        self.label = label          # Class label if leaf node

def entropy(data):
    """
    Calculate the entropy of a dataset.
    """
    n = len(data)
    if n == 0:
        return 0

    counts = Counter(data)
    probs = [count / n for count in counts.values()]
    entropy_val = -sum(p * math.log2(p) for p in probs if p != 0)
    return entropy_val

def information_gain(data, attribute_values, attribute_index):
    """
    Calculate the information gain of an attribute.
    """
    n = len(data)
    attribute_entropy = 0

    for value in attribute_values:
        subset = [example for example in data if example[attribute_index] == value]
        subset_entropy = entropy([example[-1] for example in subset])
        attribute_entropy += len(subset) / n * subset_entropy

    return entropy([example[-1] for example in data]) - attribute_entropy

def choose_best_attribute(data, attributes):
    """
    Choose the best attribute to split on based on information gain.
    """
    attribute_values = set(data[:, 0])
    best_gain = -1
    best_attribute = None

    for index, attribute in enumerate(attributes):
        gain = information_gain(data, attribute_values, index)
        if gain > best_gain:
            best_gain = gain
            best_attribute = attribute

    return best_attribute

def split_data(data, attribute_index, value):
    """
    Split the data based on a specific attribute value.
    """
    return data[data[:, attribute_index] == value]

def majority_vote(data):
    """
    Return the most common class label in the data.
    """
    counts = Counter(data[:, -1])
    return counts.most_common(1)[0][0]

def ID3(data, attributes):
    """
    Build a decision tree using the ID3 algorithm.
    """
    # If all examples have the same label, return a leaf node
    if len(set(data[:, -1])) == 1:
        return Node(label=data[0][-1])

    # If there are no attributes left, return a leaf node with the majority vote
    if len(attributes) == 0:
        return Node(label=majority_vote(data))

    # Choose the best attribute to split on
    best_attribute = choose_best_attribute(data, attributes)

    # Create a new node with the chosen attribute
    node = Node(attribute=best_attribute)

    # Recursively build the tree for each possible value of the chosen attribute
    attribute_index = attributes.index(best_attribute)
    attribute_values = set(data[:, attribute_index])
    for value in attribute_values:
        subset = split_data(data, attribute_index, value)
        if len(subset) == 0:
            node.children[value] = Node(label=majority_vote(data))
        else:
            node.children[value] = ID3(subset, [attr for attr in attributes if attr != best_attribute])

    return node

def classify(tree, instance):
    """
    Classify a new instance using the decision tree.
    """
    if tree.label is not None:
        return tree.label

    attribute_value = instance[tree.attribute]
    if attribute_value not in tree.children:
        return 'unknown'

    return classify(tree.children[attribute_value], instance)

# Example usage:

# Toy dataset: weather data
data = np.array([
    ['Sunny', 'Hot', 'High', 'Weak', 'No'],
    ['Sunny', 'Hot', 'High', 'Strong', 'No'],
    ['Overcast', 'Hot', 'High', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'High', 'Weak', 'Yes'],
    ['Rain', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Cool', 'Normal', 'Strong', 'No'],
    ['Overcast', 'Cool', 'Normal', 'Strong', 'Yes'],
    ['Sunny', 'Mild', 'High', 'Weak', 'No'],
    ['Sunny', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'Normal', 'Weak', 'Yes'],
    ['Sunny', 'Mild', 'Normal', 'Strong', 'Yes'],
    ['Overcast', 'Mild', 'High', 'Strong', 'Yes'],
    ['Overcast', 'Hot', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'High', 'Strong', 'No']
])

# Attribute names
attributes = ['Outlook', 'Temperature', 'Humidity', 'Wind']

# Build the decision tree
tree = ID3(data, attributes)

# Test the decision tree
test_instance = {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'High', 'Wind': 'Strong'}
prediction = classify(tree, test_instance)
print(f"The prediction for instance {test_instance} is: {prediction}")

output:
The prediction for instance {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'High', 'Wind': 'Strong'} is: unknown
