In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.datasets import load_iris


In [3]:
# Load the iris Dataset

X, y = load_iris(return_X_y=True)

In [4]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
from sklearn.tree import DecisionTreeClassifier

decision_ = DecisionTreeClassifier()

decision_.fit(X_train, y_train)

y_pred = decision_.predict(X_test)

In [7]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 100.00%


In [8]:
class CustomDecisionTreeClassifier():

    def __init__(self, max_depth=None):

        self.max_depth = max_depth
        self.tree = None
    
    def gini(self, y):

        classes, counts = np.unique(y, return_counts=True)

        p = counts / counts.sum()

        return 1 - np.sum(p ** 2)

    def best_split(self, X_train, y_train):

        best_gini = float("inf")
        best_features = None
        best_value = None

        n_samples, n_features = X_train.shape

        for feature in range(n_features):

            values = np.unique(X_train[:, feature])

            for value in values:
                left_mask = X_train[:, feature] <= value
                right_mask = X_train[:, feature] > value

                y_left = y_train[left_mask]
                y_right = y_train[right_mask]

                gini_left = self.gini(y_left)
                gini_right = self.gini(y_right)

                weighted_gini = (len(y_left) * gini_left + len(y_right) * gini_right) / n_samples

                if weighted_gini < best_gini:

                    best_gini = weighted_gini
                    best_features = feature
                    best_value = value
        
        return best_features, best_value
    
    def build_tree(self, X_train, y_train, depth=0):

        class_counts = np.bincount(y_train)
        majorities_classes = np.argmax(class_counts)

        if len(class_counts) == 1 or len(y_train) == 0 or (self.max_depth is not None and depth >= self.max_depth):

            return {"leaf": True, "class" : majorities_classes}

        best_feature, best_value = self.best_split(X_train, y_train)

        if best_feature is None:

            return {"leaf": True, "class" : majorities_classes}

        left_mask = X_train[:, best_feature] <= best_value
        right_mask = X_train[:, best_feature] > best_value

        left_tree = self.build_tree(X_train[left_mask], y_train[left_mask], depth + 1)
        right_tree = self.build_tree(X_train[right_mask], y_train[right_mask], depth + 1)

        return {
            "leaf" : False,
            "feature" : best_feature,
            "value" : best_value,
            "left" : left_tree,
            "right" : right_tree
        }

    def fit(self, X_train, y_train):

        self.tree = self.build_tree(X_train, y_train)
    
    def predict_one(self, X_test):

        node = self.tree
        while isinstance(node, dict) and not node["leaf"]:

            feature_ = node["feature"]
            value_ = node["value"]

            if X_test[feature_] <= value_:

                node = node["left"]
            
            else:

                node = node["right"]
        
        return node["class"]

    def predict(self, X_test):

        return np.array([self.predict_one(x) for x in X_test])

In [9]:
decision_custom = CustomDecisionTreeClassifier(max_depth=3)

decision_custom.fit(X_train, y_train)

In [10]:
y_pred_custom = decision_custom.predict(X_test)

In [11]:
accuracy_custom = accuracy_score(y_test, y_pred_custom)

In [12]:
print(f"Custom Decision Tree Accuracy: {accuracy_custom * 100:.2f}%")

Custom Decision Tree Accuracy: 100.00%
