# ランダムフォレスト作成

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from mh_tree_algorithm import DecisionTreeMH

In [None]:
class RandomForest(object):
    def __init__(self, n_estimators=100, random_state=None):
        self.n_estimators = n_estimators
        self.random_state = random_state
    
    def reselect_samples(self, samples, targets):
        num_samples, num_features = samples.shape
        if self.random_state != None:
            np.random.seed(self.random_state)
        rs_sample_index = np.random.randint(0, num_samples, size=num_samples)
        rs_num_features = int(np.ceil(np.sqrt(num_features)))
        rs_feature_index = np.random.permutation(num_features)[0:rs_num_features]
        remove_feature_index = [i for i in set(range(num_features)) - set(rs_feature_index.tolist())]
        rs_samples = samples[rs_sample_index, :]
        rs_samples[:, remove_feature_index] = 0.0
        rs_targets = targets[rs_sample_index]
        return rs_samples, rs_targets
    
    def fit(self, X_train, y_train):
        num_samples, num_features = X_train.shape
        
        self.estimators_ = []
        for i in range(self.n_estimators):
            self.estimators_ .append(DecisionTreeMH(random_state=self.random_state))
            rs_X_train, rs_y_target = self.reselect_samples(X_train, y_train)
            self.estimators_[i].fit(rs_X_train, rs_y_target)
        
        self.calc_feature_importances()
    
    def calc_feature_importances(self):
        self.feature_importances_ = [0.0] * len(self.estimators_[0].feature_importances_)
        for i in range(self.n_estimators):
            self.feature_importances_ += self.estimators_[i].feature_importances_ / self.n_estimators
    
    def predict(self, X_test):
        pred = []
        for i in range(self.n_estimators):
            pred.append(self.estimators_[i].predict(X_test).tolist())
        pred = np.array(pred)
        
        label = []
        for j in range(X_test.shape[0]):
            target = pred[:, j]
            class_count = {i: len(target[target == i]) for i in np.unique(target)}
            label.append(max(class_count.items(), key=lambda x: x[1])[0])
        return np.array(label)
    
    def score(self, sample, target):
        return np.sum(self.predict(sample) == target) / float(len(target))