In [1]:


import numpy as np
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


X, y = make_classification(n_samples=500, n_features=5, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



class RandomForest:
    def __init__(self, n_trees=10, max_depth=5, sample_size=0.8):
        self.n_trees = n_trees
        self.max_depth = max_depth
        self.sample_size = sample_size
        self.trees = []

    def _bootstrap_sample(self, X, y):
        n_samples = int(len(y) * self.sample_size)
        indices = np.random.choice(len(y), size=n_samples, replace=True)
        return X[indices], y[indices]

    def fit(self, X, y):
        for _ in range(self.n_trees):
            X_sample, y_sample = self._bootstrap_sample(X, y)
            tree = DecisionTreeClassifier(max_depth=self.max_depth)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        predictions = np.array([tree.predict(X) for tree in self.trees])
        return np.squeeze(np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions))



from sklearn.tree import DecisionTreeClassifier

rf = RandomForest(n_trees=50, max_depth=5)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

print(y_pred)
print(y_test)

print("Accuracy:", accuracy_score(y_test, y_pred))

[0 1 1 1 0 1 1 0 0 1 1 0 0 1 0 1 0 0 0 0 1 1 0 1 0 1 0 0 0 1 0 0 0 0 1 1 0
 1 0 1 0 1 1 0 1 0 1 1 1 0 0 0 1 0 1 0 0 0 0 1 1 1 1 0 1 1 1 1 0 0 1 0 1 1
 0 0 0 0 1 1 1 0 1 1 0 1 0 0 0 1 1 1 0 1 0 1 0 0 1 0]
[0 1 0 1 0 1 1 0 0 1 1 1 0 1 0 1 0 0 0 0 1 1 0 1 0 1 0 0 0 1 0 0 0 0 1 1 0
 1 0 1 0 1 1 0 1 0 1 1 1 1 0 0 1 0 1 0 0 0 0 1 1 1 1 0 1 1 1 1 0 0 1 0 0 1
 0 0 0 0 1 1 1 0 1 1 0 1 0 0 0 1 0 0 0 1 0 1 0 0 1 0]
Accuracy: 0.94
