In [1]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Using a popular classification library
iris = load_iris()
data = pd.DataFrame(data=iris.data, columns=iris.feature_names)
data['species'] = iris.target

In [16]:
class RandomForestClassifier:
    def __init__(self, n_estimators: int = 10, max_depth: int = 5, min_samples_leaf: int = 3, max_features: int | None = None):
        self.n_estimators = n_estimators # Number of decision trees
        self.max_depth = max_depth # Max depth of each decision tree
        self.min_samples_leaf = min_samples_leaf # Min samples for a node to be declared as a leaf
        self.max_features = max_features # Optional input which restricts the max number of features a tree can be trained on
        self.trees: list = [] # Stores the trees and corresponding features used


    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
         self.trees = [] # Initializing an empty list
         n_samples, n_features = X.shape
         self.max_features = self.max_features or int(np.sqrt(n_features)) # If no value is given, reverts to default (sqrt)
         indices = np.random.choice(n_samples, size=n_samples, replace=True)
         X_sample = X[indices]
         y_sample = y[indices]

            # Randomly select a subset of features
         feature_indices = np.random.choice(n_features, size=self.max_features, replace=False)
         X_sample_subfeatures = X_sample[:, feature_indices]

            # Train the decision tree
         tree = DecisionTreeClassifier(max_depth=self.max_depth, min_samples_leaf=self.min_samples_leaf)
         tree.fit(X_sample_subfeatures, y_sample)

            # Store the tree and the selected feature indices
         self.trees.append((tree, feature_indices))

    def predict(self, X: np.ndarray) -> np.ndarray:     
         predictions = []
         for tree, feature_indices in self.trees:
            X_subset = X[:, feature_indices]
            predictions.append(tree.predict(X_subset))
            predictions = np.array(predictions)
            majority_vote = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
         return majority_vote

In [17]:
model = RandomForestClassifier(n_estimators=10, max_depth=5, min_samples_leaf=3, max_features=2)

# Prepare the dataset
X = iris.data
y = iris.target

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = np.mean(y_pred == y_test)
print(f"Your model has an accuracy of {100 * accuracy}%.")

Your model has an accuracy of 96.66666666666667%.
