In [None]:
import numpy as np

class CustomDecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X, y):
        # Ensure inputs are NumPy arrays
        X = np.array(X)
        y = np.array(y)
        self.tree = self._build_tree(X, y)

    def _build_tree(self, X, y, depth=0):
        num_samples, num_features = X.shape
        unique_classes = np.unique(y)

        # Stopping criteria
        if len(unique_classes) == 1 or (self.max_depth is not None and depth >= self.max_depth):
            return {'class': unique_classes[0]}

        best_info_gain, best_split = -1, None

        for feature_idx in range(num_features):
            thresholds = np.unique(X[:, feature_idx])
            for threshold in thresholds:
                left_mask = X[:, feature_idx] <= threshold
                right_mask = ~left_mask
                if len(np.unique(y[left_mask])) == 0 or len(np.unique(y[right_mask])) == 0:
                    continue
                info_gain = self._information_gain(y, y[left_mask], y[right_mask])
                if info_gain > best_info_gain:
                    best_info_gain = info_gain
                    best_split = {
                        'feature_idx': feature_idx,
                        'threshold': threshold,
                        'left_indices': left_mask,
                        'right_indices': right_mask
                    }

        if best_split is None:
            return {'class': np.bincount(y).argmax()}

        left_tree = self._build_tree(X[best_split['left_indices']], y[best_split['left_indices']], depth + 1)
        right_tree = self._build_tree(X[best_split['right_indices']], y[best_split['right_indices']], depth + 1)

        return {'feature_idx': best_split['feature_idx'],
                'threshold': best_split['threshold'],
                'left': left_tree,
                'right': right_tree}

    def _information_gain(self, parent, left, right):
        prob_left = len(left) / len(parent)
        prob_right = len(right) / len(parent)
        return self._entropy(parent) - (prob_left * self._entropy(left) + prob_right * self._entropy(right))

    def _entropy(self, y):
        hist = np.bincount(y)
        ps = hist / len(y)
        return -np.sum([p * np.log2(p) for p in ps if p > 0])

    def predict(self, X):
        return [self._traverse_tree(x, self.tree) for x in np.array(X)]

    def _traverse_tree(self, x, tree):
        if 'class' in tree:
            return tree['class']
        feature_value = x[tree['feature_idx']]
        branch = tree['left'] if feature_value <= tree['threshold'] else tree['right']
        return self._traverse_tree(x, branch)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Load the Iris dataset from CSV
data = pd.read_csv("/content/drive/MyDrive/Concepts and Technology of AI/IRIS.csv")

# Separate features and target
X = data.iloc[:, :-1]  # All columns except the last one
y = data.iloc[:, -1]   # The last column (assumed to be the target)

# Encode target labels into numeric values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split into training and test sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42)

In [None]:
#####. Step -3- Train and Evaluate a Custom Decision Tree:
X_train_np = X_train.to_numpy()
X_test_np = X_test.to_numpy()
y_train_np = y_train  # Already numeric
y_test_np = y_test    # Already numeric

# Train the custom decision tree
custom_tree = CustomDecisionTree(max_depth=3)
custom_tree.fit(X_train_np, y_train_np)

# Predict on the test set
y_pred_custom = custom_tree.predict(X_test_np)

# Calculate accuracy
accuracy_custom = accuracy_score(y_test_np, y_pred_custom)
print(f"Custom Decision Tree Accuracy: {accuracy_custom:.4f}")


Custom Decision Tree Accuracy: 0.8444


In [None]:
######. Step -4- Train and Evaluate a Scikit Learn Decision Tree:

from sklearn.tree import DecisionTreeClassifier

sklearn_tree = DecisionTreeClassifier(max_depth=3, random_state=42)
sklearn_tree.fit(X_train, y_train)

# Predict on the test set
y_pred_sklearn = sklearn_tree.predict(X_test)

# Calculate accuracy
accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)
print(f"Scikit-learn Decision Tree Accuracy: {accuracy_sklearn:.4f}")

Scikit-learn Decision Tree Accuracy: 1.0000


In [None]:
print("Accuracy Comparison:")
print(f"Custom Decision Tree Accuracy: {accuracy_custom:.4f}")
print(f"Scikit-learn Decision Tree Accuracy: {accuracy_sklearn:.4f}")

Accuracy Comparison:
Custom Decision Tree Accuracy: 0.8444
Scikit-learn Decision Tree Accuracy: 1.0000
