In [23]:
import numpy as np
from collections import Counter
from sklearn.tree import DecisionTreeClassifier
class RandomForest:
  def __init__(self, n_trees, n_leafes= None, min_samples_split = 2):
    self.n_trees = n_trees
    self.n_leafes = n_leafes
    self.min_samples_split = min_samples_split
    self.trees = []

  def fit(self, X, y):
    for _ in range(self.n_trees):
       X_sample, y_sample = self._bootstrap_sample(X, y)
       tree = DecisionTreeClassifier(max_depth = self.n_leafes, min_samples_split = self.min_samples_split)
       tree.fit(X_sample, y_sample)
       self.trees.append(tree)
  def predict(self, X):
    tree_preds =  np.array([tree.predict(X) for tree in self.trees])
    y_pred = [self._majority_vote(tree_preds[:,i]) for i in range(X.shape[0])]
    return  np.array(y_pred)
  def _majority_vote(self, predictions):
        count = Counter(predictions)
        return count.most_common(1)[0][0]
  def _bootstrap_sample(self, X, y):
    n_samples = X.shape[0]
    indices = np.random.choice(n_samples, size = n_samples, replace = True)
    return X[indices], y[indices]

In [24]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris['data'], iris['target'], test_size=0.33  , random_state = 122)
X_train.shape

(100, 4)

In [25]:
rf = RandomForest(100, 16)
rf.fit(X_train, y_train)

In [26]:
y_pred_rf = rf.predict(X_test)
print(y_pred_rf)

[0 2 1 1 2 0 2 0 2 1 2 2 0 2 0 2 2 1 1 1 2 1 1 0 2 1 0 0 1 2 0 1 1 1 2 0 2
 1 2 0 2 0 2 0 1 2 0 1 2 0]


In [27]:
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
confusion_matrix(y_test, y_pred_rf)

array([[15,  0,  0],
       [ 0, 16,  2],
       [ 0,  0, 17]])

In [28]:
accuracy_score(y_test, y_pred_rf)

0.96