In [2]:
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

In [6]:
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature =feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value
    def is_leaf(self):
        return self.value is not None

In [15]:
class DecisionTreeClassifier:
    def __init__(self, max_depth=5):
        self.max_depth = 5
        self.root = None
    def fit(self, X,y):
        self.root = self._build_tree(X,y)
    def _build_tree(self,X,y, depth=0):
        num_samples, num_features = X.shape
        num_labels = len(set(y))
        if depth>=self.max_depth or num_labels==1:
            leaf_value = self._most_common_label(y)
            return Node(value=leaf_value)
        best_feat, best_thres = self._best_split(X, y)
        left_idx = X[:, best_feat] <= best_thres
        right_idx = X[:, best_feat] > best_thres
        left = self._build_tree(X[left_idx], y[left_idx], depth+1)
        right = self._build_tree(X[right_idx], y[right_idx], depth+1)
        return Node(feature=best_feat, threshold=best_thres, left=left, right=right)
    def _gini(self,y):
        classes = np.unique(y)
        gini = 1.0
        for cls in classes:
            p = np.sum(y==cls)/len(y)
            gini -= p**2
        return gini
    def _gini_split(self, y_left, y_right):
        n = len(y_left) + len(y_right)
        gini_left = self._gini(y_left)
        gini_right = self._gini(y_right)
        weighted_gini = (len(y_left)/n)*gini_left + (len(y_right)/n) * gini_right
        return weighted_gini
    def _best_split(self,X,y):
        best_gini = float('inf')
        best_feature = None
        best_threshold = None
        n_features = X.shape[1]
        for feature_index in range(n_features):
            thresholds = np.unique(X[:, feature_index])
            for threshold in thresholds:
                left_indices = X[:, feature_index] <= threshold
                right_indices = X[:,feature_index] > threshold
                y_left = y[left_indices]
                y_right = y[right_indices]
                if len(y_left)==0 or len(y_right)==0:
                    continue
                gini = self._gini_split(y_left, y_right)
                if gini < best_gini:
                    best_gini = gini
                    best_feature = feature_index
                    best_threshold = threshold
        return best_feature, best_threshold
    def _most_common_label(self,y):
        counter = Counter(y)
        return counter.most_common(1)[0][0]
    def predict(self, X):
        return np.array([self._traverse_tree(x, self.root) for x in X])
    def _traverse_tree(self, x, node):
        if node.is_leaf():
            return node.value
        if x[node.feature] <= node.threshold:
            return self._traverse_tree(x, node.left)
        else:
            return self._traverse_tree(x, node.right)
    def accuracy(self, y_true, y_pred):
        return np.sum(y_true==y_pred)/len(y_true)

In [21]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
from collections import Counter

In [22]:
# Load iris
iris = load_iris()
X = iris.data
y = iris.target

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [25]:
model = DecisionTreeClassifier(max_depth=3)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

acc = model.accuracy(y_test, y_pred)
print(f"Accuracy on Iris Dataset: {acc * 100:.2f}%")

Accuracy on Iris Dataset: 100.00%


In [26]:
print("Predictions: ", y_pred)

Predictions:  [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]


In [27]:
for i in range(len(y_test)):
    if y_test[i] != y_pred[i]:
        print(f"Wrong prediction → True: {y_test[i]}, Predicted: {y_pred[i]}")
