<a href="https://colab.research.google.com/github/SHALINI12012006/shalini/blob/main/MLlab2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from collections import Counter

# Node class for tree representation
class Node:
    def __init__(self, feature=None, value=None, result=None):
        self.feature = feature     # Index of the feature to split on
        self.value = value         # Feature value
        self.result = result       # Label if it's a leaf node
        self.children = {}         # Dictionary of feature_value: child_node

# Function to compute entropy
def entropy(y):
    counter = Counter(y)
    total_count = len(y)
    entropy_val = 0.0
    for label, count in counter.items():
        probability = count / total_count
        entropy_val -= probability * np.log2(probability)
    return entropy_val

# Function to calculate information gain for a feature
def information_gain(X, y, feature):
    total_entropy = entropy(y)
    unique_values = set(X[:, feature])
    weighted_entropy = 0.0
    for value in unique_values:
        subset_indices = np.where(X[:, feature] == value)[0]
        subset_y = y[subset_indices]
        weighted_entropy += (len(subset_y) / len(y)) * entropy(subset_y)
    return total_entropy - weighted_entropy

# ID3 algorithm to build the tree
def id3(X, y, features):
    if len(set(y)) == 1:
        return Node(result=y[0])  # Pure leaf node
    if len(features) == 0:
        majority_label = Counter(y).most_common(1)[0][0]
        return Node(result=majority_label)

    # Choose the best feature
    best_feature = max(features, key=lambda f: information_gain(X, y, f))
    root = Node(feature=best_feature)

    unique_values = set(X[:, best_feature])
    for value in unique_values:
        subset_indices = np.where(X[:, best_feature] == value)[0]
        subset_X = X[subset_indices]
        subset_y = y[subset_indices]
        if len(subset_y) == 0:
            majority_label = Counter(y).most_common(1)[0][0]
            root.children[value] = Node(result=majority_label)
        else:
            remaining_features = [f for f in features if f != best_feature]
            root.children[value] = id3(subset_X, subset_y, remaining_features)

    return root

# Function to predict label for a single sample
def predict(node, sample):
    if node.result is not None:
        return node.result
    value = sample[node.feature]
    if value not in node.children:
        return None
    return predict(node.children[value], sample)

# Example dataset
X_train = np.array([
    ['Sunny', 'Hot', 'High', 'Weak'],
    ['Sunny', 'Hot', 'High', 'Strong'],
    ['Overcast', 'Hot', 'High', 'Weak'],
    ['Rain', 'Mild', 'High', 'Weak'],
    ['Rain', 'Cool', 'Normal', 'Weak'],
    ['Rain', 'Cool', 'Normal', 'Strong'],
    ['Overcast', 'Cool', 'Normal', 'Strong'],
    ['Sunny', 'Mild', 'High', 'Weak'],
    ['Sunny', 'Cool', 'Normal', 'Weak'],
    ['Rain', 'Mild', 'Normal', 'Weak'],
    ['Sunny', 'Mild', 'Normal', 'Strong'],
    ['Overcast', 'Mild', 'High', 'Strong'],
    ['Overcast', 'Hot', 'Normal', 'Weak'],
    ['Rain', 'Mild', 'High', 'Strong']
])

y_train = np.array([
    'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes',
    'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No'
])

features = [0, 1, 2, 3]  # Feature indices

# Train the tree
tree = id3(X_train, y_train, features)

# Predict for a new sample
sample = ['Overcast', 'Cool', 'Normal', 'Strong']
print("Prediction:", predict(tree, sample))


Prediction: Yes
