In [4]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import accuracy_score

In [3]:
def bag(X,y):
    n_samples = X.shape[0]
    indices = np.random.choice(n_samples,size=n_samples,replace=True)
    return X[indices],y[indices]

def most_common_label(y):
    counter = Counter(y)
    return counter.most_common(1)[0][0]

class RandomForest:
    def __init__(self, n_trees=10, min_samples_split=2, max_depth=100, max_features=None):
         self.n_trees = n_trees
         self.min_samples_split = min_samples_split
         self.max_depth = max_depth
         self.max_features = max_features
         self.trees = []
    
    def fit(self, X, y):
        self.trees=[]
        for _ in range(self.n_trees):
            tree = DecisionTreeClassifier(min_samples_split=self.min_samples_split,
                                            max_depth=self.max_depth, max_features=self.max_features)
            X_sample, y_sample = bag(X, y)
            tree.fit(X_sample,y_sample)
            self.trees.append(tree)

    def predict(self, X):
        tree_predict = np.swapaxes(np.array([tree.predict(X) for tree in self.trees]),0,1)
        y_pred = [most_common_label(tree_pred) for tree_pred in tree_predict]
        return np.array(y_pred)

In [10]:
X,y = datasets.load_breast_cancer(return_X_y=True)
X_train,X_test,y_train,y_test = train_test_split(X, y)
rf = RandomForest(n_trees=15, max_depth=15,max_features='sqrt')
rf.fit(X_train,y_train)
y_pred = rf.predict(X_test)
acc = accuracy_score(y_test,y_pred)
print(acc)

0.9790209790209791
