<a href="https://colab.research.google.com/github/Saurabhnage/406-Parul/blob/main/AML_P1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np
import pandas as pd
from collections import Counter

def entropy(y):
    counts = np.bincount(y)
    probabilities = counts / len(y)
    return -np.sum([p * np.log2(p) for p in probabilities if p > 0])

def information_gain(X, y, feature):
    original_entropy = entropy(y)
    values, counts = np.unique(X[:, feature], return_counts=True)
    weighted_entropy = np.sum([(counts[i] / np.sum(counts)) * entropy(y[X[:, feature] == value]) for i, value in enumerate(values)])
    return original_entropy - weighted_entropy

def best_feature_to_split(X, y, features):
    information_gains = [information_gain(X, y, feature) for feature in features]
    return features[np.argmax(information_gains)]

def id3(X, y, features, target_labels):
    if len(np.unique(y)) == 1:
        return target_labels[y[0]]
    if len(features) == 0:
        return target_labels[Counter(y).most_common(1)[0][0]]

    best_feature = best_feature_to_split(X, y, features)
    tree = {best_feature: {}}
    features = [f for f in features if f != best_feature]
    values = np.unique(X[:, best_feature])

    for value in values:
        subtree = id3(X[X[:, best_feature] == value], y[X[:, best_feature] == value], features, target_labels)
        tree[best_feature][value] = subtree
    return tree

def predict(tree, sample):
    if not isinstance(tree, dict):
        return tree
    feature = list(tree.keys())[0]
    value = sample[df.columns[feature]]
    if value in tree[feature]:
        return predict(tree[feature][value], sample)
    else:
        return None

if __name__ == "__main__":
    data = {
        'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
        'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
        'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
        'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
        'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
    }

    df = pd.DataFrame(data)
    X = df.iloc[:, :-1].values
    y = pd.factorize(df['PlayTennis'])[0]
    features = list(range(X.shape[1]))
    target_labels = {0: 'No', 1: 'Yes'}

    tree = id3(X, y, features, target_labels)

    print("Decision Tree:")
    print(tree)

    sample = {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'High', 'Wind': 'Strong'}

    prediction = predict(tree, sample)

    print("Prediction for sample {}: {}".format(sample, prediction))


Decision Tree:
{0: {'Overcast': 'Yes', 'Rain': {3: {'Strong': 'No', 'Weak': 'Yes'}}, 'Sunny': {2: {'High': 'No', 'Normal': 'Yes'}}}}
Prediction for sample {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'High', 'Wind': 'Strong'}: No
