# **Decision Tree using ID3 Algorithm**

In [1]:
import math
from collections import Counter, defaultdict

# Entropy Calculation
def entropy(labels):
    total = len(labels)
    counts = Counter(labels)

    ent = 0
    for count in counts.values():
        p = count / total
        ent -= p * math.log2(p)
    return ent

# Information Gain
def information_gain(data, feature, target):
    total_entropy = entropy([row[target] for row in data])

    feature_values = defaultdict(list)
    for row in data:
        feature_values[row[feature]].append(row)

    weighted_entropy = 0
    total = len(data)

    for subset in feature_values.values():
        prob = len(subset) / total
        weighted_entropy += prob * entropy([row[target] for row in subset])

    return total_entropy - weighted_entropy

# ID3 Algorithm
def id3(data, features, target):
    labels = [row[target] for row in data]

    # Case 1: All examples have same label
    if len(set(labels)) == 1:
        return labels[0]

    # Case 2: No features left
    if not features:
        return Counter(labels).most_common(1)[0][0]

    # Choose best feature
    gains = {feature: information_gain(data, feature, target) for feature in features}
    best_feature = max(gains, key=gains.get)

    tree = {best_feature: {}}

    feature_values = set(row[best_feature] for row in data)

    for value in feature_values:
        subset = [row for row in data if row[best_feature] == value]

        if not subset:
            tree[best_feature][value] = Counter(labels).most_common(1)[0][0]
        else:
            remaining_features = features.copy()
            remaining_features.remove(best_feature)
            tree[best_feature][value] = id3(subset, remaining_features, target)

    return tree

# Prediction
def predict(tree, sample):
    if not isinstance(tree, dict):
        return tree

    feature = next(iter(tree))
    value = sample.get(feature)

    if value not in tree[feature]:
        return None

    return predict(tree[feature][value], sample)

# Example Dataset
data = [
    {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak', 'Play': 'No'},
    {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Strong', 'Play': 'No'},
    {'Outlook': 'Overcast', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak', 'Play': 'Yes'},
    {'Outlook': 'Rain', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Weak', 'Play': 'Yes'},
    {'Outlook': 'Rain', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Weak', 'Play': 'Yes'},
    {'Outlook': 'Rain', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Strong', 'Play': 'No'},
]

features = ['Outlook', 'Temperature', 'Humidity', 'Wind']
target = 'Play'

# Train Decision Tree
tree = id3(data, features, target)

print("Decision Tree:")
print(tree)

# Test Prediction
sample = {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'High', 'Wind': 'Strong'}
print("\nPrediction:", predict(tree, sample))


Decision Tree:
{'Outlook': {'Sunny': 'No', 'Rain': {'Wind': {'Weak': 'Yes', 'Strong': 'No'}}, 'Overcast': 'Yes'}}

Prediction: No


# **Random Forest**

In [2]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Sample Dataset
# Features: [Age, Salary]
X = np.array([
    [22, 25000],
    [25, 32000],
    [47, 78000],
    [52, 110000],
    [46, 90000],
    [56, 150000],
    [55, 135000],
    [60, 160000]
])

# Labels: 0 = No, 1 = Yes
y = np.array([0, 0, 1, 1, 1, 1, 1, 1])

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

# Random Forest Model
model = RandomForestClassifier(
    n_estimators=100,      # number of trees
    criterion="gini",      # or "entropy"
    max_depth=None,
    random_state=42
)

# Train Model
model.fit(X_train, y_train)

# Prediction
y_pred = model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Single Prediction
sample = np.array([[45, 85000]])
print("Prediction:", model.predict(sample))

Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

Prediction: [1]
