<a href="https://colab.research.google.com/github/HemaP-0303/ML_LAB/blob/main/1BM22CS111_4_ID3_DECISION_TREE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from collections import Counter

# Function to calculate entropy
def entropy(y):
    # Convert string labels to numerical representation (e.g., 0 and 1)
    unique_labels = np.unique(y)
    label_mapping = {label: i for i, label in enumerate(unique_labels)}
    numerical_y = np.array([label_mapping[label] for label in y])

    counts = np.bincount(numerical_y)  # Now use numerical_y for bincount
    probabilities = counts / len(numerical_y)
    return -np.sum([p * np.log2(p) for p in probabilities if p > 0])

# Function to calculate information gain
def information_gain(X, y, feature_index):
    total_entropy = entropy(y)
    values, counts = np.unique(X[:, feature_index], return_counts=True)
    weighted_entropy = sum((counts[i] / sum(counts)) * entropy(y[X[:, feature_index] == values[i]]) for i in range(len(values)))
    return total_entropy - weighted_entropy

# Function to build decision tree
def id3(X, y, features):
    if len(set(y)) == 1:
        return y[0]
    if len(features) == 0:
        return Counter(y).most_common(1)[0][0]

    gains = [information_gain(X, y, i) for i in range(len(features))]
    best_feature = np.argmax(gains)

    tree = {features[best_feature]: {}}
    values = np.unique(X[:, best_feature])
    for value in values:
        sub_X = X[X[:, best_feature] == value]
        sub_y = y[X[:, best_feature] == value]
        sub_features = features[:best_feature] + features[best_feature+1:]
        tree[features[best_feature]][value] = id3(sub_X, sub_y, sub_features)

    return tree

# Function to print decision tree
def print_tree(tree, indent=""):
    if isinstance(tree, dict):
        for key, value in tree.items():
            print(f"{indent}{key}")
            for subkey, subtree in value.items():
                print(f"{indent}  ├── {subkey}:")
                print_tree(subtree, indent + "    ")
    else:
        print(f"{indent}  └── {tree}")

# Load dataset and run ID3
def main():
    df = pd.read_csv('/content/id3.csv')
    features = list(df.columns[:-1])
    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values

    # Compute and print information gain for the root node
    gains = [information_gain(X, y, i) for i in range(len(features))]
    print("Information Gain of Root Node:")
    for feature, gain in zip(features, gains):
        print(f"{feature}: {gain:.4f}")

    # Build and print decision tree
    tree = id3(X, y, features)
    print("\nDecision Tree:")
    print_tree(tree)
if __name__ == "__main__":

    main()

Information Gain of Root Node:
Outlook: 0.2467
Temperature: 0.0292
Humidity: 0.1518
Wind: 0.0481

Decision Tree:
Outlook
  ├── overcast:
      └── yes
  ├── rain:
    Humidity
      ├── cool:
        Temperature
          ├── rain:
            Wind
              ├── rain:
                  └── yes
      ├── mild:
        Temperature
          ├── rain:
            Wind
              ├── rain:
                  └── yes
  ├── sunny:
    Wind
      ├── high:
          └── no
      ├── normal:
          └── yes
