In [30]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier 
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### <font color = 'black'> Dummy example Iris Data and RFC </font>

<font color = 'grey'> Get data and split it </font>

In [7]:
iris = datasets.load_iris()

In [10]:
X = iris.data
Y = iris.target

In [14]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = .3, random_state = 42)

##### <font color = 'grey'> Build RFC with sklearn </font>

In [27]:


class RandomForest:
    def __init__(self, n_estimators=100, max_depth=None, max_features='sqrt', random_state=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.max_features = max_features
        self.random_state = random_state
        self.estimators = []

    def fit(self, X, y):
        np.random.seed(self.random_state)
        self.estimators = []
        for _ in range(self.n_estimators):
            # Bootstrap sample
            indices = np.random.choice(range(X.shape[0]), size=X.shape[0], replace=True)
            X_sample = X[indices]
            y_sample = y[indices]
            
            # Create and fit decision tree
            tree = DecisionTreeClassifier(max_depth=self.max_depth, max_features=self.max_features)
            tree.fit(X_sample, y_sample)
            
            self.estimators.append(tree)
            
    def predict(self, X):
        predictions = np.zeros((len(X), len(self.estimators)))
        for i, estimator in enumerate(self.estimators):
            predictions[:, i] = estimator.predict(X)
    
        return np.apply_along_axis(lambda x: np.bincount(x.astype(int)).argmax(), axis=1, arr=predictions)






  
                               

In [33]:
rf = RandomForest(n_estimators=100, max_depth=5, max_features='sqrt', random_state=42)

# Fit the Random Forest on training data
rf.fit(x_train, y_train)

# Make predictions on test data
y_pred = rf.predict(x_test)

In [34]:
acc = accuracy_score(y_test, y_pred)
acc

1.0