<a href="https://colab.research.google.com/github/Drishti-2028/ML-CODES-IMPLEMENTATION/blob/main/decisiontree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -----------------------------------------------------------
# ID3 DECISION TREE ALGORITHM
# Full Algorithm + Code Combined
# -----------------------------------------------------------

import math
from collections import Counter


# -----------------------------------------------------------
# Function to calculate Entropy
# -----------------------------------------------------------

def entropy(data):
    """
    Step 1:
    Compute entropy of given dataset.
    Entropy(S) = - sum(p_i * log2(p_i))
    """
    labels = [row[-1] for row in data]
    label_counts = Counter(labels)

    total = len(data)
    ent = 0

    for count in label_counts.values():
        p = count / total
        ent -= p * math.log2(p)

    return ent


# -----------------------------------------------------------
# Function to split dataset based on attribute
# -----------------------------------------------------------

def split_data(data, attribute_index, value):
    """
    Step 2:
    Create subset where attribute = value
    """
    subset = []
    for row in data:
        if row[attribute_index] == value:
            reduced_row = row[:attribute_index] + row[attribute_index+1:]
            subset.append(reduced_row)
    return subset


# -----------------------------------------------------------
# Function to compute Information Gain
# -----------------------------------------------------------

def information_gain(data, attribute_index):
    """
    Step 3:
    Gain(S,A) = Entropy(S) - Weighted Entropy of subsets
    """
    total_entropy = entropy(data)

    values = set([row[attribute_index] for row in data])
    weighted_entropy = 0
    total = len(data)

    for value in values:
        subset = split_data(data, attribute_index, value)
        weight = len(subset) / total
        weighted_entropy += weight * entropy(subset)

    return total_entropy - weighted_entropy


# -----------------------------------------------------------
# ID3 Recursive Algorithm
# -----------------------------------------------------------

def id3(data, attributes):

    # Step 4: If all examples have same class → return class
    labels = [row[-1] for row in data]
    if labels.count(labels[0]) == len(labels):
        return labels[0]

    # Step 5: If no attributes left → return majority class
    if len(attributes) == 0:
        return Counter(labels).most_common(1)[0][0]

    # Step 6: Select attribute with highest information gain
    gains = [information_gain(data, i) for i in range(len(attributes))]
    best_attr_index = gains.index(max(gains))
    best_attr = attributes[best_attr_index]

    tree = {best_attr: {}}

    # Step 7: For each value of best attribute
    values = set([row[best_attr_index] for row in data])

    for value in values:
        subset = split_data(data, best_attr_index, value)

        if not subset:
            tree[best_attr][value] = Counter(labels).most_common(1)[0][0]
        else:
            remaining_attrs = attributes[:best_attr_index] + attributes[best_attr_index+1:]
            tree[best_attr][value] = id3(subset, remaining_attrs)

    return tree


# -----------------------------------------------------------
# Example Usage
# -----------------------------------------------------------

if __name__ == "__main__":

    # Dataset: Outlook, Temperature, Humidity, Wind, Play
    data = [
        ['Sunny', 'Hot', 'High', 'Weak', 'No'],
        ['Sunny', 'Hot', 'High', 'Strong', 'No'],
        ['Overcast', 'Hot', 'High', 'Weak', 'Yes'],
        ['Rain', 'Mild', 'High', 'Weak', 'Yes'],
        ['Rain', 'Cool', 'Normal', 'Weak', 'Yes'],
        ['Rain', 'Cool', 'Normal', 'Strong', 'No'],
        ['Overcast', 'Cool', 'Normal', 'Strong', 'Yes']
    ]

    attributes = ['Outlook', 'Temperature', 'Humidity', 'Wind']

    tree = id3(data, attributes)

    print("Decision Tree:")
    print(tree)

In [None]:
# -----------------------------------------------------------
# NAIVE BAYES CLASSIFIER (Categorical Data)
# Full Algorithm + Code Combined
# -----------------------------------------------------------

from collections import Counter, defaultdict


class NaiveBayes:

    def __init__(self):
        self.class_priors = {}
        self.likelihoods = {}
        self.classes = None


    # -------------------------------------------------------
    # Step 1: Train the model
    # -------------------------------------------------------
    def fit(self, X, Y):

        n = len(Y)
        self.classes = set(Y)

        # Step 1: Compute Prior Probabilities P(C)
        class_counts = Counter(Y)

        for c in self.classes:
            self.class_priors[c] = class_counts[c] / n

        # Step 2: Compute Likelihood P(x_i | C)
        self.likelihoods = {c: defaultdict(lambda: defaultdict(int)) for c in self.classes}

        for features, label in zip(X, Y):
            for i, value in enumerate(features):
                self.likelihoods[label][i][value] += 1

        # Convert counts to probabilities
        for c in self.classes:
            total_class_count = class_counts[c]
            for feature_index in self.likelihoods[c]:
                for value in self.likelihoods[c][feature_index]:
                    self.likelihoods[c][feature_index][value] /= total_class_count


    # -------------------------------------------------------
    # Step 3: Predict
    # -------------------------------------------------------
    def predict(self, X):
        predictions = []

        for sample in X:
            class_scores = {}

            for c in self.classes:

                # Start with prior probability
                score = self.class_priors[c]

                # Multiply likelihoods
                for i, value in enumerate(sample):

                    if value in self.likelihoods[c][i]:
                        score *= self.likelihoods[c][i][value]
                    else:
                        score *= 1e-6  # small value for unseen feature

                class_scores[c] = score

            # Step 4: Select class with highest probability
            predicted_class = max(class_scores, key=class_scores.get)
            predictions.append(predicted_class)

        return predictions


# -----------------------------------------------------------
# Example Usage
# -----------------------------------------------------------

if __name__ == "__main__":

    # Training Data (Categorical)
    X_train = [
        ['Sunny', 'Hot', 'High'],
        ['Sunny', 'Hot', 'High'],
        ['Overcast', 'Hot', 'High'],
        ['Rain', 'Mild', 'High'],
        ['Rain', 'Cool', 'Normal'],
        ['Rain', 'Cool', 'Normal'],
        ['Overcast', 'Cool', 'Normal']
    ]

    Y_train = ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes']

    model = NaiveBayes()
    model.fit(X_train, Y_train)

    test_sample = [['Sunny', 'Cool', 'High']]

    prediction = model.predict(test_sample)

    print("Predicted Class:", prediction[0])