**极端随机森林（ERF）**     
基于图像特征构建一组决策树，并通过训练这个森林实现正确的决策

In [1]:
import pickle
import numpy as np
from sklearn.ensemble import ExtraTreesClassifier
from sklearn import preprocessing

class ERFTrainer(object):
    def __init__(self, X, label_words):
        self.le = preprocessing.LabelEncoder()  #用一个标签编码器来对训练进行编码
        self.clf = ExtraTreesClassifier(n_estimators=100, 
                max_depth=16, random_state=0)

        y = self.encode_labels(label_words)
        self.clf.fit(np.asarray(X), y)

    def encode_labels(self, label_words): # 对标签进行编码
        self.le.fit(label_words) 
        return np.array(self.le.transform(label_words), dtype=np.float32)

    def classify(self, X):  # 对未知数据点进行分类
        label_nums = self.clf.predict(np.asarray(X))
        label_words = self.le.inverse_transform([int(x) for x in label_nums]) 
        return label_words 
    
    
if __name__=='__main__':
    feature_map_file = "feature_map.pkl"
    model_file = "erf.pkl"

    # 加载上一节生成的特征地图
    with open(feature_map_file, 'rb') as f:
        feature_map = pickle.load(f)

    # 提取特征向量和标记
    label_words = [x['object_class'] for x in feature_map]
    dim_size = feature_map[0]['feature_vector'].shape[1]  
    X = [np.reshape(x['feature_vector'], (dim_size,)) for x in feature_map]
    
    # 基于训练数据训练ERF
    erf = ERFTrainer(X, label_words) 
    if model_file:
        with open(model_file, 'wb') as f:
            pickle.dump(erf, f)