In [1]:
#  Import lib
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from sklearn.datasets import load_iris
import pydotplus
import pandas as pd
import numpy as np
import random
#  載入資料
iris = load_iris()
X = iris.data
y = iris.target

#  資料轉入pandas
df = pd.DataFrame(data=X, columns=iris.feature_names)

#  確認資料狀況
df.head()

#  檢查dataframe資訊
df.info()

#  訓練模型
tree = DecisionTreeClassifier(criterion='entropy',
                                splitter='best',
                                random_state=150)
tree.fit(X, y)

#  產生樹狀圖
dot_data = export_graphviz(tree, out_file=None, 
                         feature_names=iris.feature_names,  
                         class_names=iris.target_names,  
                         filled=True, rounded=True,  
                         special_characters=True) 
graph = pydotplus.graph_from_dot_data(dot_data)
#  產生png檔
graph.write_png('sample_tree.png')

#  將特徵權重排序之後寫入dict
feature_importances_ = {}
_feat_labels = iris.feature_names
_importances = tree.feature_importances_

# 取得特徵權重排序的索引值
indices = np.argsort(_importances)[::-1]

for f in range(X.shape[1]):
    feature_importances_.update({_feat_labels[indices[f]]: _importances[indices[f]]})

#  確認特徵權重
feature_importances_

#  寫入excel，手動試算一次
writer = pd.ExcelWriter('output.xlsx')
df.to_excel(writer,'Sheet1')
writer.save()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB
