<a href="https://colab.research.google.com/github/Sriyansh-36-AI-NITJ/Machine-Learning-Lab/blob/main/ID3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
from math import log2

# Load dataset
url = "https://raw.githubusercontent.com/Sriyansh-36-AI-NITJ/Machine-Learning-Lab/refs/heads/main/PlayTennis.csv"
df = pd.read_csv(url)

In [4]:
df.head()

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes


In [5]:
# Entropy function
def entropy(data):
    labels = data.iloc[:, -1]
    probs = labels.value_counts(normalize=True)
    return -sum(probs * probs.apply(lambda p: log2(p)))


# Information gain
def info_gain(data, feature):
    total_entropy = entropy(data)
    values = data[feature].unique()
    weighted_entropy = 0

    for v in values:
        subset = data[data[feature] == v]
        weighted_entropy += (len(subset) / len(data)) * entropy(subset)

    return total_entropy - weighted_entropy


# Build tree
def id3(data, features, depth=0):
    labels = data.iloc[:, -1]

    # Base cases
    if len(labels.unique()) == 1:
        return labels.iloc[0]
    if len(features) == 0:
        return labels.mode()[0]

    # Select best feature
    gains = {f: info_gain(data, f) for f in features}
    best = max(gains, key=gains.get)

    tree = {best: {}}

    for value in data[best].unique():
        subset = data[data[best] == value]
        if subset.empty:
            tree[best][value] = labels.mode()[0]
        else:
            tree[best][value] = id3(subset, [f for f in features if f != best])

    return tree



In [6]:
class DecisionTreeID3:
    def __init__(self):
        self.tree = None

    def fit(self, df):
        features = list(df.columns[:-1])
        self.tree = id3(df, features)

    def predict(self, sample, tree=None):
        if tree is None:
            tree = self.tree

        feature = next(iter(tree))
        value = sample[feature]

        branch = tree[feature].get(value)
        if isinstance(branch, dict):
            return self.predict(sample, branch)
        return branch

In [7]:
features = list(df.columns[:-1])
tree = id3(df, features)

print(tree)

{'Outlook': {'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}, 'Overcast': 'Yes', 'Rain': {'Wind': {'Weak': 'Yes', 'Strong': 'No'}}}}


In [8]:
import json
print(json.dumps(tree, indent=2))

{
  "Outlook": {
    "Sunny": {
      "Humidity": {
        "High": "No",
        "Normal": "Yes"
      }
    },
    "Overcast": "Yes",
    "Rain": {
      "Wind": {
        "Weak": "Yes",
        "Strong": "No"
      }
    }
  }
}


In [9]:
model = DecisionTreeID3()
model.fit(df)

sample = {
    "Outlook": "Rain",
    "Temperature": "Mild",
    "Humidity": "Hign",
    "Wind": "Strong",
}

print("Prediction:", model.predict(sample))

Prediction: No
