In [None]:
import numpy as np
from collections import Counter
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

class DecisionTree:
    def __init__(self, criterion='entropy', max_depth=None, min_samples_leaf=1):
        self.criterion = criterion
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.tree = None

    def _entropy(self, y):
        """Calculate entropy of a target vector."""
        counts = np.bincount(y)
        probabilities = counts / len(y)
        return -np.sum([p * np.log2(p) for p in probabilities if p > 0])

    def _gini(self, y):
        """Calculate Gini impurity of a target vector."""
        counts = np.bincount(y)
        probabilities = counts / len(y)
        return 1 - np.sum([p**2 for p in probabilities])

    def _information_gain(self, y, y_left, y_right, criterion):
        """Calculate information gain for a split."""
        if criterion == 'entropy':
            parent_impurity = self._entropy(y)
            child_impurity = (len(y_left) / len(y)) * self._entropy(y_left) + (len(y_right) / len(y)) * self._entropy(y_right)
        elif criterion == 'gini':
            parent_impurity = self._gini(y)
            child_impurity = (len(y_left) / len(y)) * self._gini(y_left) + (len(y_right) / len(y)) * self._gini(y_right)
        return parent_impurity - child_impurity

    def _best_split(self, X, y):
        """Find the best feature and threshold to split on."""
        best_gain = -1
        best_feature, best_threshold = None, None

        for feature in range(X.shape[1]):
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                left_indices = X[:, feature] <= threshold
                right_indices = X[:, feature] > threshold
                if sum(left_indices) == 0 or sum(right_indices) == 0:
                    continue

                y_left, y_right = y[left_indices], y[right_indices]
                gain = self._information_gain(y, y_left, y_right, self.criterion)

                if gain > best_gain:
                    best_gain = gain
                    best_feature = feature
                    best_threshold = threshold

        return best_feature, best_threshold

    def _build_tree(self, X, y, depth=0):
        """Recursively build the decision tree."""
        num_samples, num_features = X.shape
        num_classes = len(np.unique(y))

        if (depth == self.max_depth or num_samples < self.min_samples_leaf or num_classes == 1):
            leaf_value = self._most_common_label(y)
            return Node(value=leaf_value)

        feature, threshold = self._best_split(X, y)
        if feature is None:
            return Node(value=self._most_common_label(y))

        left_indices = X[:, feature] <= threshold
        right_indices = X[:, feature] > threshold

        left = self._build_tree(X[left_indices], y[left_indices], depth + 1)
        right = self._build_tree(X[right_indices], y[right_indices], depth + 1)

        return Node(feature, threshold, left, right)

    def _most_common_label(self, y):
        """Return the most common label in a target vector."""
        return Counter(y).most_common(1)[0][0]

    def fit(self, X, y):
        """Fit the decision tree to the data."""
        self.tree = self._build_tree(X, y)

    def _predict_sample(self, x, node):
        """Predict a single sample by traversing the tree."""
        if node.value is not None:
            return node.value
        if x[node.feature] <= node.threshold:
            return self._predict_sample(x, node.left)
        else:
            return self._predict_sample(x, node.right)

    def predict(self, X):
        """Predict labels for a dataset."""
        return np.array([self._predict_sample(x, self.tree) for x in X])

wine = load_wine()
X, y = wine.data, wine.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

custom_tree_entropy = DecisionTree(criterion='entropy', max_depth=3, min_samples_leaf=2)
custom_tree_entropy.fit(X_train, y_train)
y_pred_custom_entropy = custom_tree_entropy.predict(X_test)
print(f"Custom Decision Tree (Entropy) Accuracy: {accuracy_score(y_test, y_pred_custom_entropy):.4f}")

custom_tree_gini = DecisionTree(criterion='gini', max_depth=3, min_samples_leaf=2)
custom_tree_gini.fit(X_train, y_train)
y_pred_custom_gini = custom_tree_gini.predict(X_test)
print(f"Custom Decision Tree (Gini) Accuracy: {accuracy_score(y_test, y_pred_custom_gini):.4f}")

sklearn_tree_entropy = DecisionTreeClassifier(criterion='entropy', max_depth=3, min_samples_leaf=2, random_state=42)
sklearn_tree_entropy.fit(X_train, y_train)
y_pred_sklearn_entropy = sklearn_tree_entropy.predict(X_test)
print(f"Sklearn Decision Tree (Entropy) Accuracy: {accuracy_score(y_test, y_pred_sklearn_entropy):.4f}")

sklearn_tree_gini = DecisionTreeClassifier(criterion='gini', max_depth=3, min_samples_leaf=2, random_state=42)
sklearn_tree_gini.fit(X_train, y_train)
y_pred_sklearn_gini = sklearn_tree_gini.predict(X_test)
print(f"Sklearn Decision Tree (Gini) Accuracy: {accuracy_score(y_test, y_pred_sklearn_gini):.4f}")

Custom Decision Tree (Entropy) Accuracy: 0.9167
Custom Decision Tree (Gini) Accuracy: 0.9444
Sklearn Decision Tree (Entropy) Accuracy: 0.9167
Sklearn Decision Tree (Gini) Accuracy: 0.9444
