In [None]:
import numpy as np
import pandas as pd
from math import log2
from collections import Counter

class J48Node:
    """Class that represents a decision node or leaf in the J48 tree"""

    def __init__(self, feature=None, threshold=None, children=None, value=None, split_value=None):
        self.feature = feature        # Feature to split on
        self.threshold = threshold    # Threshold for numerical features
        self.children = children or {} # Dictionary of child nodes
        self.value = value           # Value if leaf node (class label)
        self.split_value = split_value # For handling missing values

    def is_leaf(self):
        return self.value is not None


class J48Classifier:
    """J48 Decision Tree Classifier (similar to C4.5)"""

    def __init__(self, max_depth=10, min_samples_split=2, confidence_factor=0.25, num_folds=3):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.confidence_factor = confidence_factor  # For pruning
        self.num_folds = num_folds                # For reduced-error pruning
        self.root = None

    def fit(self, X, y):
        """Build the J48 decision tree with pruning"""
        if isinstance(X, pd.DataFrame):
            X = X.values
        if isinstance(y, pd.Series):
            y = y.values

        self.n_classes = len(np.unique(y))
        self.n_features = X.shape[1]

        # First grow the tree
        self.root = self._grow_tree(X, y)

        # Then prune it
        self.root = self._prune_tree(X, y, self.root)

    def _grow_tree(self, X, y, depth=0):
        """Recursively grow the decision tree"""
        n_samples, n_features = X.shape
        n_labels = len(np.unique(y))

        # Stopping criteria
        if (depth >= self.max_depth
            or n_labels == 1
            or n_samples < self.min_samples_split):
            leaf_value = self._most_common_label(y)
            return J48Node(value=leaf_value)

        # Find best split
        best_feature, best_threshold, best_splits = self._best_split(X, y)

        if best_feature is None:  # No split improves information gain
            leaf_value = self._most_common_label(y)
            return J48Node(value=leaf_value)

        # Split the data
        children = {}
        if best_threshold is not None:  # Numerical feature
            left_idxs = X[:, best_feature] <= best_threshold
            right_idxs = X[:, best_feature] > best_threshold
            children["left"] = self._grow_tree(X[left_idxs], y[left_idxs], depth + 1)
            children["right"] = self._grow_tree(X[right_idxs], y[right_idxs], depth + 1)
        else:  # Categorical feature
            for value, idxs in best_splits.items():
                children[value] = self._grow_tree(X[idxs], y[idxs], depth + 1)

        return J48Node(feature=best_feature, threshold=best_threshold, children=children)