In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:
class RandomForest():
    def __init__(self, n_estimators=50, max_depth=5, max_samples=5, max_features=0.8):
        self.trees = []
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.max_samples = max_samples
        self.max_features = max_features

    def fit(self, X, y):
        self.trees = []
        for i in range(self.n_estimators):
            samples = np.random.choice(X.shape[0], size=self.max_samples, replace=True)
            tree = DecisionTreeClassifier(max_depth=self.max_depth, max_features=self.max_features)
            tree.fit(X[samples], y[samples])
            self.trees.append(tree)

    def predict(self, X):
        predictions = np.zeros((X.shape[0], self.n_estimators))
        for i, tree in enumerate(self.trees):
            predictions[:, i] = tree.predict(X)
        return np.round(np.mean(predictions, axis=1))

In [3]:
def CrossValidation(model, X, y, n_folds=5):
    scores = []
    idxs = np.arange(X.shape[0])
    np.random.shuffle(idxs)
    split_size = idxs.shape[0] // n_folds
    for i in range(n_folds):
        test_idxs = idxs[i * split_size: (i + 1) * split_size]
        train_idxs = [i for i in idxs if i not in test_idxs]
        model.fit(X[train_idxs], y[train_idxs])
        predictions = model.predict(X[test_idxs])
        scores.append(np.mean(predictions == y[test_idxs]))
    return np.mean(scores)

In [4]:
data = load_breast_cancer()

X = data.data
y = data.target

In [5]:
CrossValidation(RandomForest(n_estimators=20, max_features=0.6, max_samples=5, max_depth=8), X, y)

0.9061946902654867

In [6]:
CrossValidation(RandomForestClassifier(n_estimators=20, max_features=0.6, max_samples=5, max_depth=8), X, y)

0.9185840707964601