# Importing necessary libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the Iris dataset

In [2]:
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Helper function: Bootstrap Sampling

In [4]:
def bootstrap_sample(X, y):
    """
    Generate a bootstrap sample from the dataset.
    """
    n_samples = X.shape[0]
    indices = np.random.choice(n_samples, n_samples, replace=True)
    return X[indices], y[indices]

# Helper function: Calculate Gini Impurity

In [5]:
def gini_impurity(y):
    """
    Calculate the Gini Impurity for a given set of labels.
    """
    classes, counts = np.unique(y, return_counts=True)
    probabilities = counts / counts.sum()
    return 1 - np.sum(probabilities**2)

# Helper function: Split the Dataset

In [6]:
def split_dataset(X, y, feature_index, threshold):
    """
    Split the dataset based on the feature index and threshold.
    """
    left_indices = X[:, feature_index] <= threshold
    right_indices = X[:, feature_index] > threshold
    return X[left_indices], X[right_indices], y[left_indices], y[right_indices]

# Helper function: Find the Best Split

In [7]:
def find_best_split(X, y):
    """
    Find the best split for a decision tree node.
    """
    n_samples, n_features = X.shape
    best_gini = float("inf")
    best_split = None

    for feature_index in range(n_features):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            _, _, y_left, y_right = split_dataset(X, y, feature_index, threshold)
            if len(y_left) == 0 or len(y_right) == 0:
                continue

            gini_left = gini_impurity(y_left)
            gini_right = gini_impurity(y_right)
            weighted_gini = (len(y_left) * gini_left + len(y_right) * gini_right) / n_samples

            if weighted_gini < best_gini:
                best_gini = weighted_gini
                best_split = {
                    "feature_index": feature_index,
                    "threshold": threshold,
                    "gini": best_gini,
                }
    return best_split

# Decision Tree Classifier

In [8]:
class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X, y, depth=0):
        """
        Train the decision tree recursively.
        """
        n_samples, n_features = X.shape
        num_classes = len(np.unique(y))

        # Stopping conditions
        if depth == self.max_depth or n_samples <= 1 or num_classes == 1:
            self.tree = np.argmax(np.bincount(y))
            return

        # Find the best split
        split = find_best_split(X, y)
        if not split:
            self.tree = np.argmax(np.bincount(y))
            return

        # Split the dataset
        X_left, X_right, y_left, y_right = split_dataset(X, y, split["feature_index"], split["threshold"])
        if len(y_left) == 0 or len(y_right) == 0:
            self.tree = np.argmax(np.bincount(y))
            return

        # Create child nodes
        self.tree = {
            "feature_index": split["feature_index"],
            "threshold": split["threshold"],
            "left": DecisionTree(max_depth=self.max_depth),
            "right": DecisionTree(max_depth=self.max_depth),
        }
        self.tree["left"].fit(X_left, y_left, depth + 1)
        self.tree["right"].fit(X_right, y_right, depth + 1)

    def predict(self, X):
        """
        Predict the class for a single sample.
        """
        if isinstance(self.tree, dict):
            feature_index = self.tree["feature_index"]
            threshold = self.tree["threshold"]
            if X[feature_index] <= threshold:
                return self.tree["left"].predict(X)
            else:
                return self.tree["right"].predict(X)
        else:
            return self.tree

# Random Forest Classifier

In [9]:
class RandomForestClassifier:
    def __init__(self, n_estimators=10, max_depth=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        """
        Train the random forest by fitting multiple decision trees.
        """
        self.trees = []
        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            X_sample, y_sample = bootstrap_sample(X, y)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        """
        Predict the class for each sample by aggregating predictions from all trees.
        """
        tree_predictions = np.array([tree.predict(x) for x in X for tree in self.trees]).reshape(len(self.trees), -1)
        majority_vote = [np.bincount(tree_predictions[:, i]).argmax() for i in range(tree_predictions.shape[1])]
        return np.array(majority_vote)


# Train the Random Forest Classifier

In [10]:
rf_classifier = RandomForestClassifier(n_estimators=10, max_depth=5)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)

# Evaluate the classifier

In [11]:
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.43333333333333335