In [48]:
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import StandardScaler
from sklearn.tree import export_graphviz
from sklearn.model_selection import train_test_split, cross_val_score, KFold, GridSearchCV
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

path = os.path.join('data','D:/Codes/training.csv')
data = pd.read_csv(path)

# 打乱数据集
from sklearn import utils
data = utils.shuffle(data)

Y = data["Fire Alarm"].values
X = data.drop("Fire Alarm", axis=1).values
print(f"X=\n{X}")
print(f"\nY=\n{Y}")

# 数据标准化
scaler = StandardScaler()       # 实例化
scaler.fit(X)                   # 训练标准化对象
X = scaler.transform(X)         # 转换数据集

# 拆分训练集和测试集
train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size=0.3, random_state=0)

# 实例化分类器（默认生成100颗树，信息熵，叶子结点最少样本量为3，最大深度为15）
clf = RandomForestClassifier(n_estimators=2, criterion="entropy", min_samples_leaf=3, max_depth=15)

# 训练
clf.fit(train_X, train_Y)
# 预测并评估
predict_Y = clf.predict(test_X)

print(f"准确率acc = {accuracy_score(y_pred=predict_Y, y_true=test_Y)}")

os.environ["PATH"] += os.pathsep + "D:/Codes"
# print(os.environ["PATH"])
# 提取随机森林中单个树
# estimator = clf.estimators_[5]  # 提取第5颗树
# 遍历提取所有树，分别绘制
for i_tree in range(len(clf.estimators_)):
    # 绘图并导出
    dot_data = export_graphviz(clf.estimators_[i_tree], out_file=None,
                               feature_names=(list(data.drop("Fire Alarm", axis=1).columns)))  # 第1个参数Decision_tree是dtc或clf都可以
    import pydotplus
    graph = pydotplus.graph_from_dot_data(dot_data)
    graph.get_nodes()[7].set_fillcolor("#FFF2DD")
    if os.path.exists(f"{i_tree}_out.png"):
       pass
    else:
        graph.write_png(f"{i_tree}_out.png")  # 当前文件夹生成out.png

X=
[[1.65476144e+09 1.10800000e+01 5.51600000e+01 ... 1.70000000e-01
  3.32000000e-01 1.89000000e-01]
 [1.65475044e+09 1.46260000e+01 4.95700000e+01 ... 1.53600000e+01
  2.39500000e+00 5.40000000e-02]
 [1.65473668e+09 7.29500000e+00 6.14300000e+01 ... 1.39100000e+01
  2.16900000e+00 4.90000000e-02]
 ...
 [1.65477857e+09 2.65700000e+01 4.96500000e+01 ... 1.03200000e+01
  1.61000000e+00 3.60000000e-02]
 [1.65476220e+09 1.27100000e+01 5.69500000e+01 ... 2.07000000e+00
  3.23000000e-01 7.00000000e-03]
 [1.65474975e+09 1.26770000e+01 5.24000000e+01 ... 1.28000000e+01
  1.99600000e+00 4.50000000e-02]]

Y=
[0 1 1 ... 1 0 1]
准确率acc = 0.9992963197523046
D:\Anaconda\envs\Pytorch;D:\Anaconda\envs\Pytorch\Library\mingw-w64\bin;D:\Anaconda\envs\Pytorch\Library\usr\bin;D:\Anaconda\envs\Pytorch\Library\bin;D:\Anaconda\envs\Pytorch\Scripts;D:\Anaconda\envs\Pytorch\bin;D:\Anaconda\condabin;D:\Anaconda;D:\Anaconda\Library\mingw-w64\bin;D:\Anaconda\Library\usr\bin;D:\Anaconda\Library\bin;D:\Anaconda\Scri

In [43]:
data

Unnamed: 0,UTC,Temperature[C],Humidity[%],TVOC[ppb],eCO2[ppm],Raw H2,Raw Ethanol,Pressure[hPa],PM1.0,PM2.5,NC0.5,NC1.0,NC2.5,Fire Alarm
35979,1655127708,13.934,42.04,102,400,12911,20615,937.436,1.86,1.93,12.77,1.991,0.045,0
15284,1654739509,-8.105,55.60,164,400,13174,20100,939.661,0.88,0.91,6.04,0.942,0.021,1
33220,1654771792,23.320,55.44,938,687,12784,19489,939.127,2.17,2.25,14.93,2.329,0.053,1
26232,1654780840,24.470,53.84,1226,411,12917,19417,938.682,1.75,1.82,12.07,1.881,0.042,1
45537,1654735773,18.186,47.27,9,400,13221,20150,939.632,1.13,1.17,7.75,1.208,0.027,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22681,1654754020,7.951,52.18,1017,400,13013,19469,938.715,2.62,2.72,18.02,2.811,0.063,1
5064,1654773726,26.980,47.02,1067,568,12826,19456,938.964,1.94,2.01,13.33,2.078,0.047,1
6170,1654776921,24.990,51.44,1172,436,12870,19439,938.804,2.02,2.10,13.92,2.171,0.049,1
42865,1654738602,-9.088,54.59,13,400,13256,20215,939.690,2.60,2.71,17.92,2.795,0.063,1
