In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from collections import Counter

class RandomForestClassifierCustom:
    def __init__(self, n_trees=10, max_features='sqrt', bootstrap=True):
        """
        Initialize the Random Forest Classifier.
        :param n_trees: Number of trees in the forest.
        :param max_features: Number of features to consider for each tree ('sqrt' or an integer).
        :param bootstrap: Whether to use bootstrap sampling for training data.
        """
        self.n_trees = n_trees
        self.max_features = max_features
        self.bootstrap = bootstrap
        self.trees = []  # List to hold decision trees
        self.feature_subsets = []  # List to hold feature indices used in each tree

    def _bootstrap_sample(self, X, y):
        """
        Perform bootstrap sampling.
        """
        n_samples = X.shape[0]
        indices = np.random.choice(n_samples, size=n_samples, replace=True)
        return X[indices], y[indices]

    def _random_feature_subset(self, X):
        """
        Select a random subset of features.
        """
        if isinstance(self.max_features, str) and self.max_features == 'sqrt':
            n_features = int(np.sqrt(X.shape[1]))
        else:
            n_features = self.max_features
        feature_indices = np.random.choice(X.shape[1], size=n_features, replace=False)
        return feature_indices

    def _train_decision_tree(self, X, y, feature_indices):
        """
        Train a decision tree on the given data and subset of features.
        """
        from sklearn.tree import DecisionTreeClassifier
        tree = DecisionTreeClassifier()
        tree.fit(X[:, feature_indices], y)
        return tree

    def fit(self, X, y):
        """
        Fit the random forest model.
        """
        for _ in range(self.n_trees):
            # Bootstrap sampling
            if self.bootstrap:
                X_sample, y_sample = self._bootstrap_sample(X, y)
            else:
                X_sample, y_sample = X, y

            # Random feature subset
            feature_indices = self._random_feature_subset(X)
            self.feature_subsets.append(feature_indices)

            # Train a decision tree
            tree = self._train_decision_tree(X_sample, y_sample, feature_indices)
            self.trees.append(tree)

    def predict(self, X):
        """
        Predict using the trained random forest.
        """
        predictions = []
        for tree, feature_indices in zip(self.trees, self.feature_subsets):
            tree_preds = tree.predict(X[:, feature_indices])
            predictions.append(tree_preds)

        # Transpose predictions for majority voting
        predictions = np.array(predictions).T
        majority_votes = [Counter(pred).most_common(1)[0][0] for pred in predictions]
        return np.array(majority_votes)


# Test the Random Forest Implementation on Iris Dataset
if __name__ == "__main__":
    # Load dataset
    iris = load_iris()
    X = iris.data
    y = iris.target

    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Initialize and train the custom Random Forest Classifier
    rf = RandomForestClassifierCustom(n_trees=10, max_features='sqrt', bootstrap=True)
    rf.fit(X_train, y_train)

    # Predict on test set
    y_pred = rf.predict(X_test)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    print("Custom Random Forest Classifier")
    print(f"Accuracy: {accuracy:.2f}")


Custom Random Forest Classifier
Accuracy: 1.00
