In [61]:
import numpy as np

In [2]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

# Dataset
data = pd.DataFrame({
    'Age': [25, 30, 35, 40, 45, 50, 55, 60],
    'Income': ['High', 'High', 'Medium', 'Low', 'Low', 'Low', 'Medium', 'High'],
    'Student': ['No', 'No', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No'],
    'Credit': ['Fair', 'Excellent', 'Fair', 'Fair', 'Fair', 'Excellent', 'Excellent', 'Fair'],
    'Buy Computer': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No']
})

# Encode categorical features as numerical values
data_encoded = data.copy()
data_encoded['Income'] = data_encoded['Income'].map({'High': 0, 'Medium': 1, 'Low': 2})
data_encoded['Student'] = data_encoded['Student'].map({'No': 0, 'Yes': 1})
data_encoded['Credit'] = data_encoded['Credit'].map({'Fair': 0, 'Excellent': 1})
data_encoded['Buy Computer'] = data_encoded['Buy Computer'].map({'No': 0, 'Yes': 1})

# Features and target variable
X = data_encoded[['Age', 'Income', 'Student', 'Credit']].values
y = data_encoded['Buy Computer'].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Train Decision Tree
tree = DecisionTreeClassifier(criterion='gini', max_depth=3, min_samples_leaf=1, random_state=42)
tree.fit(X_train, y_train)

# Predict for a new person (Age=42, Income=Low, Student=No, Credit=Excellent)
new_person = [[42, 2, 0, 1]]  # Low Income (2), No Student (0), Excellent Credit (1)
prediction = tree.predict(new_person)
print(f'Prediction for new person (Age=42, Income=Low, Student=No, Credit=Excellent): {"Buy" if prediction[0] == 1 else "No Buy"}')


Prediction for new person (Age=42, Income=Low, Student=No, Credit=Excellent): Buy


In [33]:
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score

# Bagging with 10 trees
bagging = BaggingClassifier(estimator=DecisionTreeClassifier(criterion='gini', max_depth=3, min_samples_leaf=1),
                           n_estimators=10, random_state=42, oob_score=True)
bagging.fit(X_train, y_train)

# Predict on test data
y_pred = bagging.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Compute OOB error
oob_error = 1 - bagging.oob_score_

print(f'Accuracy of Bagging Model: {accuracy * 100:.2f}%')
print(f'OOB Error of Bagging Model: {oob_error * 100:.2f}%')
new_sample = np.array([[42, 2, 0, 1]])  # Age=42, Income=Low, Student=No, Credit=Excellent
pred = bagging.predict(new_sample)
print(f'Prediction for new person: {"Buy" if pred[0] == 1 else "No Buy"}')


Accuracy of Bagging Model: 50.00%
OOB Error of Bagging Model: 16.67%
Prediction for new person: Buy


In [60]:
# Bagging with 10 trees, using only two random predictors
bagging_random_features = BaggingClassifier(estimator=DecisionTreeClassifier(criterion='gini', max_depth=3, min_samples_leaf=1),
                                            n_estimators=10, max_features=2, random_state=42, oob_score=True)
bagging_random_features.fit(X_train, y_train)

# Predict on test data
y_pred_random_features = bagging_random_features.predict(X_test)
accuracy_random_features = accuracy_score(y_test, y_pred_random_features)

# Compute OOB error
oob_error_random_features = 1 - bagging_random_features.oob_score_

print(f'Accuracy of Bagging with Random Features: {accuracy_random_features * 100:.2f}%')
print(f'OOB Error of Bagging with Random Features: {oob_error_random_features * 100:.2f}%')
new_sample = np.array([[42, 2, 0, 1]])  # Age=42, Income=Low, Student=No, Credit=Excellent
pred = bagging.predict(new_sample)
print(f'Prediction for new person: {"Buy" if pred[0] == 1 else "No Buy"}')


Accuracy of Bagging with Random Features: 50.00%
OOB Error of Bagging with Random Features: 16.67%
Prediction for new person: Buy


Without using sklearn

Without scklearn 
Q3

Q3

In [11]:
import numpy as np

# Dataset
data = np.array([
    [25, 'High', 'No', 'Fair', 'No'],
    [30, 'High', 'No', 'Excellent', 'No'],
    [35, 'Medium', 'No', 'Fair', 'Yes'],
    [40, 'Low', 'No', 'Fair', 'Yes'],
    [45, 'Low', 'Yes', 'Fair', 'Yes'],
    [50, 'Low', 'Yes', 'Excellent', 'No'],
    [55, 'Medium', 'Yes', 'Excellent', 'Yes'],
    [60, 'High', 'No', 'Fair', 'No']
])

# Mapping for the categorical columns only
feature_map = {
    'High': 0, 'Medium': 1, 'Low': 2, 'No': 0, 'Yes': 1,
    'Fair': 0, 'Excellent': 1
}

# Manually encode categorical columns and leave numerical columns (age) as they are
def encode_data(data):
    encoded_data = []

    for row in data:
        encoded_row = []
        # Encode categorical features using the feature_map
        for i, value in enumerate(row[1:]):  # Skip the age column
            if value in feature_map:
                encoded_row.append(feature_map[value])
            else:
                encoded_row.append(value)  # For age (numeric), leave it as it is
        encoded_data.append([int(row[0])] + encoded_row)  # Add the age column back as it is

    return np.array(encoded_data)

# Encode the data
data_encoded = encode_data(data)

# Check the encoded data
print("Encoded Data:\n", data_encoded)

# Extract features and target
X = data_encoded[:, :-1]  # Features (age, income, student, credit)
y = data_encoded[:, -1]   # Target (buy computer)

# Check if X and y are encoded correctly
print("Encoded Features:\n", X)
print("Encoded Target:\n", y)

# Ensure X and y are numpy arrays and their types are correct
X = np.array(X, dtype=int)
y = np.array(y, dtype=int)

# Decision Tree Implementation
class DecisionTree:
    def __init__(self, max_depth=None, min_samples_leaf=1):
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.tree = None

    def gini_impurity(self, y):
        class_labels = np.unique(y)
        impurity = 1
        for label in class_labels:
            prob = np.sum(y == label) / len(y)
            impurity -= prob ** 2
        return impurity

    def split_dataset(self, X, y, feature_index, threshold):
        left_mask = X[:, feature_index] <= threshold
        right_mask = ~left_mask
        return X[left_mask], X[right_mask], y[left_mask], y[right_mask]

    def best_split(self, X, y):
        best_gini = float('inf')
        best_split = None
        num_features = X.shape[1]

        for feature_index in range(num_features):
            thresholds = np.unique(X[:, feature_index])

            for threshold in thresholds:
                X_left, X_right, y_left, y_right = self.split_dataset(X, y, feature_index, threshold)


                if len(y_left) == 0 or len(y_right) == 0:
                    continue

                gini_left = self.gini_impurity(y_left)
                gini_right = self.gini_impurity(y_right)
                gini = (len(y_left) / len(y)) * gini_left + (len(y_right) / len(y)) * gini_right

                if gini < best_gini:
                    best_gini = gini
                    best_split = (feature_index, threshold)

        return best_split

    def build_tree(self, X, y, depth=0):
        if len(np.unique(y)) == 1:  # If only one class remains, stop splitting
            return {'label': y[0]}

        if len(y) <= self.min_samples_leaf or (self.max_depth and depth >= self.max_depth):
            return {'label': np.bincount(y).argmax()}

        feature_index, threshold = self.best_split(X, y)
        if feature_index is None:
            return {'label': np.bincount(y).argmax()}

        X_left, X_right, y_left, y_right = self.split_dataset(X, y, feature_index, threshold)

        left_tree = self.build_tree(X_left, y_left, depth + 1)
        right_tree = self.build_tree(X_right, y_right, depth + 1)

        return {'feature_index': feature_index, 'threshold': threshold, 'left': left_tree, 'right': right_tree}

    def fit(self, X, y):
        self.tree = self.build_tree(X, y)

    def predict_one(self, node, x):
        if 'label' in node:
            return node['label']

        if x[node['feature_index']] <= node['threshold']:
            return self.predict_one(node['left'], x)
        else:
            return self.predict_one(node['right'], x)

    def predict(self, X):
        return np.array([self.predict_one(self.tree, x) for x in X])

# Train a Decision Tree
tree = DecisionTree(max_depth=3, min_samples_leaf=2)
tree.fit(X, y)

# Predict for a new person
new_person = np.array([[42, 2, 0, 1]])  # (Age=42, Low Income, No Student, Excellent Credit)
prediction = tree.predict(new_person)
print(f'Prediction for new person: {"Buy" if prediction[0] == 1 else "No Buy"}')


Encoded Data:
 [[25  0  0  0  0]
 [30  0  0  1  0]
 [35  1  0  0  1]
 [40  2  0  0  1]
 [45  2  1  0  1]
 [50  2  1  1  0]
 [55  1  1  1  1]
 [60  0  0  0  0]]
Encoded Features:
 [[25  0  0  0]
 [30  0  0  1]
 [35  1  0  0]
 [40  2  0  0]
 [45  2  1  0]
 [50  2  1  1]
 [55  1  1  1]
 [60  0  0  0]]
Encoded Target:
 [0 0 1 1 1 0 1 0]
Prediction for new person: Buy


Q4

Q4 a Improve the performance by bagging 10 different trees. Compute the OOB
error.

a) Improve the performance by bagging 10 different trees. Compute the OOB
error.

In [27]:
import numpy as np

class Bagging:
    def __init__(self, base_estimator, n_estimators=10, max_depth=None, min_samples_leaf=1):
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.trees = []
        self.oob_indices = []  # Store OOB indices for each tree

    def bootstrap_sample(self, X, y):
        n_samples = X.shape[0]
        indices = np.random.choice(range(n_samples), size=n_samples, replace=True)
        oob_indices = [i for i in range(n_samples) if i not in indices]
        return X[indices], y[indices], oob_indices

    def fit(self, X, y):
        self.trees = []
        self.oob_indices = []

        for _ in range(self.n_estimators):
            X_bootstrap, y_bootstrap, oob_idx = self.bootstrap_sample(X, y)
            tree = self.base_estimator(max_depth=self.max_depth,
                                     min_samples_leaf=self.min_samples_leaf)
            tree.fit(X_bootstrap, y_bootstrap)
            self.trees.append(tree)
            self.oob_indices.append(oob_idx)

    def predict(self, X):
        tree_preds = np.array([tree.predict(X) for tree in self.trees])
        return np.array([np.bincount(tree_preds[:, i]).argmax()
                        for i in range(X.shape[0])])

    def oob_error(self, X, y):
        n_samples = X.shape[0]
        oob_preds = np.zeros(n_samples)
        oob_counts = np.zeros(n_samples)

        for i in range(n_samples):
            preds = []
            for j, tree in enumerate(self.trees):
                if i in self.oob_indices[j]:  # If sample was OOB for this tree
                    preds.append(tree.predict(X[i:i+1])[0])

            if len(preds) > 0:
                oob_preds[i] = np.bincount(preds).argmax()
                oob_counts[i] = 1

        return np.sum(oob_counts * (oob_preds != y)) / np.sum(oob_counts)

# Initialize and train
bagging = Bagging(base_estimator=DecisionTree, n_estimators=10,
                 max_depth=3, min_samples_leaf=1)
bagging.fit(X, y)

# OOB Error calculation
oob_err = bagging.oob_error(X, y)
print(f"OOB Error: {oob_err:.4f}")

# Prediction
new_sample = np.array([[42, 2, 0, 1]])  # Age=42, Income=Low, Student=No, Credit=Excellent
pred = bagging.predict(new_sample)
print(f"Prediction: {'Buy' if pred[0] == 1 else 'No Buy'}")

OOB Error: 0.7500
Prediction: No Buy


In [28]:
import numpy as np

class Bagging:
    def __init__(self, base_estimator, n_estimators=10, max_depth=None, min_samples_leaf=1):
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.trees = []
        self.oob_indices = []  # Store OOB indices for each tree

    def bootstrap_sample(self, X, y):
        n_samples = X.shape[0]
        indices = np.random.choice(range(n_samples), size=n_samples, replace=True)
        oob_indices = [i for i in range(n_samples) if i not in indices]
        return X[indices], y[indices], oob_indices

    def fit(self, X, y):
        self.trees = []
        self.oob_indices = []

        for _ in range(self.n_estimators):
            X_bootstrap, y_bootstrap, oob_idx = self.bootstrap_sample(X, y)
            tree = self.base_estimator(max_depth=self.max_depth,
                                     min_samples_leaf=self.min_samples_leaf)
            tree.fit(X_bootstrap, y_bootstrap)
            self.trees.append(tree)
            self.oob_indices.append(oob_idx)

    def predict(self, X):
        tree_preds = np.array([tree.predict(X) for tree in self.trees])
        return np.array([np.bincount(tree_preds[:, i]).argmax()
                        for i in range(X.shape[0])])

    def oob_error(self, X, y):
        n_samples = X.shape[0]
        oob_preds = np.zeros(n_samples)
        oob_counts = np.zeros(n_samples)

        for i in range(n_samples):
            preds = []
            for j, tree in enumerate(self.trees):
                if i in self.oob_indices[j]:  # If sample was OOB for this tree
                    preds.append(tree.predict(X[i:i+1])[0])

            if len(preds) > 0:
                oob_preds[i] = np.bincount(preds).argmax()
                oob_counts[i] = 1

        return np.sum(oob_counts * (oob_preds != y)) / np.sum(oob_counts)

# Initialize and train
bagging = Bagging(base_estimator=DecisionTree, n_estimators=10,
                 max_depth=3, min_samples_leaf=1)
bagging.fit(X, y)

# OOB Error calculation
oob_err = bagging.oob_error(X, y)
print(f"OOB Error: {oob_err:.4f}")

# Prediction
new_sample = np.array([[42, 2, 0, 1]])  # Age=42, Income=Low, Student=No, Credit=Excellent
pred = bagging.predict(new_sample)
print(f"Prediction: {'Buy' if pred[0] == 1 else 'No Buy'}")

OOB Error: 0.3750
Prediction: Buy


b) â€¢ Improve the performance by bagging 10 different trees but using only two
random predictors while building the trees. Compute the OOB error.

b Improve the performance by bagging 10 different trees but using only two
random predictors while building the trees. Compute the OOB error.

In [50]:
import numpy as np
class RandomDecisionTree:
    def __init__(self, max_depth=None, min_samples_leaf=1, max_features=None):
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.max_features = max_features
        self.tree = None

    def gini_impurity(self, y):
        """Calculate Gini impurity for classification."""
        m = len(y)
        if m == 0:
            return 0
        return 1 - sum((np.sum(y == c) / m) ** 2 for c in np.unique(y))

    def best_split(self, X, y):
        """Find the best feature and threshold to split on."""
        m, n = X.shape
        best_gini = float('inf')
        best_split = None

        # If max_features not specified, use all features
        feature_indices = np.random.choice(
            n,
            size=self.max_features if self.max_features else n,
            replace=False
        )

        for feature in feature_indices:
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                left_mask = X[:, feature] <= threshold
                right_mask = ~left_mask
                y_left, y_right = y[left_mask], y[right_mask]

                # Skip invalid splits
                if len(y_left) < self.min_samples_leaf or len(y_right) < self.min_samples_leaf:
                    continue

                # Weighted Gini impurity
                gini = (len(y_left) * self.gini_impurity(y_left) + len(y_right) * self.gini_impurity(y_right)) / m

                if gini < best_gini:
                    best_gini = gini
                    best_split = (feature, threshold)

        return best_split

    def build_tree(self, X, y, depth=0):
        """Recursively build the decision tree."""
        # Stopping conditions
        if len(np.unique(y)) == 1:  # All same class
            return y[0]
        if len(y) <= self.min_samples_leaf:
            return np.argmax(np.bincount(y))  # Majority class
        if self.max_depth is not None and depth >= self.max_depth:
            return np.argmax(np.bincount(y))  # Majority class

        split = self.best_split(X, y)
        if split is None:  # No valid split found
            return np.argmax(np.bincount(y))  # Majority class

        feature, threshold = split
        left_mask = X[:, feature] <= threshold
        right_mask = ~left_mask

        left_tree = self.build_tree(X[left_mask], y[left_mask], depth + 1)
        right_tree = self.build_tree(X[right_mask], y[right_mask], depth + 1)

        return (feature, threshold, left_tree, right_tree)

    def fit(self, X, y):
        """Train the tree."""
        self.tree = self.build_tree(X, y)

    def predict_one(self, x):
        """Predict a single sample."""
        node = self.tree
        while isinstance(node, tuple):  # While not a leaf node
            feature, threshold, left_tree, right_tree = node
            if x[feature] <= threshold:
                node = left_tree
            else:
                node = right_tree
        return node

    def predict(self, X):
        """Predict multiple samples."""
        return np.array([self.predict_one(x) for x in X])

class BaggingWithRandomFeatures:
    def __init__(self, base_estimator, n_estimators=10, max_depth=None, min_samples_leaf=1, max_features=2):
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.max_features = max_features
        self.trees = []
        self.oob_indices = []  # Track OOB samples for each tree

    def bootstrap_sample(self, X, y):
        """Generate bootstrap sample and track OOB indices."""
        n_samples = X.shape[0]
        indices = np.random.choice(n_samples, size=n_samples, replace=True)
        oob_indices = [i for i in range(n_samples) if i not in indices]
        return X[indices], y[indices], oob_indices

    def fit(self, X, y):
        """Train the ensemble."""
        self.trees = []
        self.oob_indices = []

        for _ in range(self.n_estimators):
            X_boot, y_boot, oob_idx = self.bootstrap_sample(X, y)
            tree = self.base_estimator(
                max_depth=self.max_depth,
                min_samples_leaf=self.min_samples_leaf,
                max_features=self.max_features
            )
            tree.fit(X_boot, y_boot)
            self.trees.append(tree)
            self.oob_indices.append(oob_idx)

    def predict(self, X):
        """Predict using majority voting."""
        tree_preds = np.array([tree.predict(X) for tree in self.trees])
        return np.array([np.bincount(tree_preds[:, i]).argmax() for i in range(X.shape[0])])

    def oob_error(self, X, y):
        """Compute OOB error using stored OOB indices."""
        n_samples = X.shape[0]
        oob_preds = np.zeros(n_samples)
        oob_counts = np.zeros(n_samples)

        for i in range(n_samples):
            preds = []
            for j, tree in enumerate(self.trees):
                if i in self.oob_indices[j]:  # If sample was OOB for this tree
                    preds.append(tree.predict(X[i:i+1])[0])

            if len(preds) > 0:
                oob_preds[i] = np.bincount(preds).argmax()
                oob_counts[i] = 1

        return np.sum(oob_counts * (oob_preds != y)) / np.sum(oob_counts)



# Train the model
bagging_model = BaggingWithRandomFeatures(
    base_estimator=RandomDecisionTree,
    n_estimators=10,
    max_depth=3,
    min_samples_leaf=1,
    max_features=2
)
bagging_model.fit(X, y)

# Compute OOB error
oob_error = bagging_model.oob_error(X, y)
print(f"OOB Error (with 2 random features per tree): {oob_error:.4f}")

# Predict for a new sample
new_sample = np.array([[42, 2, 0, 1]])  # Age=42, Income=Low, Student=No, Credit=Excellent
pred = bagging_model.predict(new_sample)
print(f"Prediction: {'Buy' if pred[0] == 1 else 'No Buy'}")

OOB Error (with 2 random features per tree): 0.1250
Prediction: Buy


In [49]:
import numpy as np
class RandomDecisionTree:
    def __init__(self, max_depth=None, min_samples_leaf=1, max_features=None):
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.max_features = max_features
        self.tree = None

    def gini_impurity(self, y):
        """Calculate Gini impurity for classification."""
        m = len(y)
        if m == 0:
            return 0
        return 1 - sum((np.sum(y == c) / m) ** 2 for c in np.unique(y))

    def best_split(self, X, y):
        """Find the best feature and threshold to split on."""
        m, n = X.shape
        best_gini = float('inf')
        best_split = None

        # If max_features not specified, use all features
        feature_indices = np.random.choice(
            n,
            size=self.max_features if self.max_features else n,
            replace=False
        )

        for feature in feature_indices:
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                left_mask = X[:, feature] <= threshold
                right_mask = ~left_mask
                y_left, y_right = y[left_mask], y[right_mask]

                # Skip invalid splits
                if len(y_left) < self.min_samples_leaf or len(y_right) < self.min_samples_leaf:
                    continue

                # Weighted Gini impurity
                gini = (len(y_left) * self.gini_impurity(y_left) + len(y_right) * self.gini_impurity(y_right)) / m

                if gini < best_gini:
                    best_gini = gini
                    best_split = (feature, threshold)

        return best_split

    def build_tree(self, X, y, depth=0):
        """Recursively build the decision tree."""
        # Stopping conditions
        if len(np.unique(y)) == 1:  # All same class
            return y[0]
        if len(y) <= self.min_samples_leaf:
            return np.argmax(np.bincount(y))  # Majority class
        if self.max_depth is not None and depth >= self.max_depth:
            return np.argmax(np.bincount(y))  # Majority class

        split = self.best_split(X, y)
        if split is None:  # No valid split found
            return np.argmax(np.bincount(y))  # Majority class

        feature, threshold = split
        left_mask = X[:, feature] <= threshold
        right_mask = ~left_mask

        left_tree = self.build_tree(X[left_mask], y[left_mask], depth + 1)
        right_tree = self.build_tree(X[right_mask], y[right_mask], depth + 1)

        return (feature, threshold, left_tree, right_tree)

    def fit(self, X, y):
        """Train the tree."""
        self.tree = self.build_tree(X, y)

    def predict_one(self, x):
        """Predict a single sample."""
        node = self.tree
        while isinstance(node, tuple):  # While not a leaf node
            feature, threshold, left_tree, right_tree = node
            if x[feature] <= threshold:
                node = left_tree
            else:
                node = right_tree
        return node

    def predict(self, X):
        """Predict multiple samples."""
        return np.array([self.predict_one(x) for x in X])

class BaggingWithRandomFeatures:
    def __init__(self, base_estimator, n_estimators=10, max_depth=None, min_samples_leaf=1, max_features=2):
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.max_features = max_features
        self.trees = []
        self.oob_indices = []  # Track OOB samples for each tree

    def bootstrap_sample(self, X, y):
        """Generate bootstrap sample and track OOB indices."""
        n_samples = X.shape[0]
        indices = np.random.choice(n_samples, size=n_samples, replace=True)
        oob_indices = [i for i in range(n_samples) if i not in indices]
        return X[indices], y[indices], oob_indices

    def fit(self, X, y):
        """Train the ensemble."""
        self.trees = []
        self.oob_indices = []

        for _ in range(self.n_estimators):
            X_boot, y_boot, oob_idx = self.bootstrap_sample(X, y)
            tree = self.base_estimator(
                max_depth=self.max_depth,
                min_samples_leaf=self.min_samples_leaf,
                max_features=self.max_features
            )
            tree.fit(X_boot, y_boot)
            self.trees.append(tree)
            self.oob_indices.append(oob_idx)

    def predict(self, X):
        """Predict using majority voting."""
        tree_preds = np.array([tree.predict(X) for tree in self.trees])
        return np.array([np.bincount(tree_preds[:, i]).argmax() for i in range(X.shape[0])])

    def oob_error(self, X, y):
        """Compute OOB error using stored OOB indices."""
        n_samples = X.shape[0]
        oob_preds = np.zeros(n_samples)
        oob_counts = np.zeros(n_samples)

        for i in range(n_samples):
            preds = []
            for j, tree in enumerate(self.trees):
                if i in self.oob_indices[j]:  # If sample was OOB for this tree
                    preds.append(tree.predict(X[i:i+1])[0])

            if len(preds) > 0:
                oob_preds[i] = np.bincount(preds).argmax()
                oob_counts[i] = 1

        return np.sum(oob_counts * (oob_preds != y)) / np.sum(oob_counts)



# Train the model
bagging_model = BaggingWithRandomFeatures(
    base_estimator=RandomDecisionTree,
    n_estimators=10,
    max_depth=3,
    min_samples_leaf=1,
    max_features=2
)
bagging_model.fit(X, y)

# Compute OOB error
oob_error = bagging_model.oob_error(X, y)
print(f"OOB Error (with 2 random features per tree): {oob_error:.4f}")

# Predict for a new sample
new_sample = np.array([[42, 2, 0, 1]])  # Age=42, Income=Low, Student=No, Credit=Excellent
pred = bagging_model.predict(new_sample)
print(f"Prediction: {'Buy' if pred[0] == 1 else 'No Buy'}")

OOB Error (with 2 random features per tree): 0.3750
Prediction: No Buy
