In [47]:
from collections import Counter
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris

X, Y = load_iris().data, load_iris().target

In [95]:
class RandomForestClassifier:
    def __init__(self, n_estimators=100, max_depth=None, min_samples_split=2, max_features='auto'):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split=2
        self.max_features = max_features
        self.trees_feat_idx = []
        
    def _get_trees(self):
        trees = []
        for _ in range(self.n_estimators):
            trees.append(
                DecisionTreeClassifier(max_depth=self.max_depth, 
                                       min_samples_split=self.min_samples_split))
        return trees
    
    def _get_max_features(self, X):
        n_features = X.shape[1]
        if self.max_features == 'auto':
            return int(np.sqrt(n_features))
        elif isinstance(self.max_features, int):
            return max_features
        else:
            raise ValueError('Bad max features')
            
    def get_subset(self, X, Y):
        m = X.shape[0]
        idx = np.random.choice(range(m), 10)
        return X[idx], Y[idx]
        
        
    def fit(self, X, Y):
        self.trees = self._get_trees()
        max_feat = self._get_max_features(X)
        m = X.shape[0]
        
        for i in range(self.n_estimators):
            sub_X, sub_Y = self.get_subset(X, Y)
            idx_features = np.random.choice(range(sub_X.shape[1]), max_feat, replace=True)
            self.trees_feat_idx.append(idx_features)
            temp_X = sub_X[:, idx_features]
            self.trees[i].fit(temp_X, sub_Y)
            
            
    def predict(self, X):
        y_preds = np.zeros(shape=(X.shape[0], self.n_estimators))
        for i in range(self.n_estimators):
            idx_features = self.trees_feat_idx[i]
            temp_X = X[:, idx_features]
            one_pred = self.trees[i].predict(temp_X)
            y_preds[:,i] = one_pred
        
        result = []
        for row in y_preds:
            pred = Counter(row).most_common(1)[0][0]
            result.append(pred)
        
        return result

In [71]:
one = X[:, 0].reshape(-1,1).tolist()
two = X[:, 1].reshape(-1,1).tolist()

In [96]:
model = RandomForestClassifier(max_depth=5)
model.fit(X, Y)
Y_pred = model.predict(X)