<a href="https://colab.research.google.com/github/Sparkashok/Machine-Learning/blob/main/ML_DecisionTree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from graphviz import Digraph

class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

def entropy(y):
    values, counts = np.unique(y, return_counts=True)
    probabilities = counts / counts.sum()
    return -np.sum(probabilities * np.log2(probabilities))

def best_split(X, y):
    best_gain = -1
    best_feature = None
    best_threshold = None
    for feature in range(X.shape[1]):
        thresholds = np.unique(X[:, feature])
        for threshold in thresholds:
            left_indices = X[:, feature] <= threshold
            right_indices = X[:, feature] > threshold
            if np.sum(left_indices) == 0 or np.sum(right_indices) == 0:
                continue
            left_entropy = entropy(y[left_indices])
            right_entropy = entropy(y[right_indices])
            gain = entropy(y) - (np.sum(left_indices) / len(y) * left_entropy + np.sum(right_indices) / len(y) * right_entropy)
            if gain > best_gain:
                best_gain = gain
                best_feature = feature
                best_threshold = threshold
    return best_feature, best_threshold

def build_tree(X, y):
    if len(np.unique(y)) == 1:
        return Node(value=y[0])
    feature, threshold = best_split(X, y)
    if feature is None:
        return Node(value=np.bincount(y).argmax())
    left_indices = X[:, feature] <= threshold
    right_indices = X[:, feature] > threshold
    left_subtree = build_tree(X[left_indices], y[left_indices])
    right_subtree = build_tree(X[right_indices], y[right_indices])
    return Node(feature, threshold, left_subtree, right_subtree)

def predict(node, x):
    if node.value is not None:
        return node.value
    if x[node.feature] <= node.threshold:
        return predict(node.left, x)
    else:
        return predict(node.right, x)

def predict_all(tree, X):
    return np.array([predict(tree, x) for x in X])

def visualize_tree(node, dot=None, parent=None, edge_label=None):
    if dot is None:
        dot = Digraph()
    if node.value is not None:
        dot.node(str(id(node)), f"Class: {node.value}")
    else:
        dot.node(str(id(node)), f"Feature {node.feature}\n<= {node.threshold}")
    if parent is not None:
        dot.edge(str(id(parent)), str(id(node)), label=edge_label)
    if node.left:
        visualize_tree(node.left, dot, node, "Yes")
    if node.right:
        visualize_tree(node.right, dot, node, "No")
    return dot

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build and visualize the tree
tree = build_tree(X_train, y_train)
dot = visualize_tree(tree)
dot.render("decision_tree", format="png", cleanup=False)

# Make predictions
y_pred = predict_all(tree, X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


Accuracy: 1.00
