In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

np.random.seed(0)

## 加载数据集

In [None]:
from sklearn.datasets import load_iris 

iris = load_iris() 
X = iris.data[:, : 2 ] # 我们只取前两个特征
y = iris.target

## 绘制数据集

In [None]:
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=iris.target_names[y], style=iris.target_names[y], 
                palette=['r','c','b'], markers=('s','o','^'), edgecolor='k')
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.legend()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

In [None]:
print ( f'训练准确率：{clf.score(X_train, y_train): .4 f} ' ) 
print ( f'测试准确率：{clf.score(X_test, y_test): .4 f} ' )

In [None]:
from matplotlib.colors import ListedColormap

def plot_decision_boundaries(clf, X, y, feature_names, class_names, 
                             colors=['r', 'c', 'b'], markers=('s', 'o', '^')):  
    cmap = ListedColormap(colors)
     # 在范围 [x1_min, x1_max] x [x2_min, x2_max] 内创建 n 个样本点的网格
    n = 1000 
    x1_min, x1_max = X[:, 0 ]. min () - 1 , X[:, 0 ]. max () + 1
    x2_min, x2_max = X[:, 1 ]. min () - 1 , X[:, 1 ]. max () + 1 
    x1, x2 = np.meshgrid(np.linspace(x1_min, x1_max, n), np.linspace(x2_min, x2_max, n)) 
    
    # 找到网格中每个点的标签
    y_pred = clf.predict(np.c_[x1.ravel(), x2.ravel()]) 
    y_pred = y_pred.reshape(x1.shape) 
    
    # 将结果放入彩色图中  
    plt.contourf(x1, x2, y_pred, cmap=cmap, alpha= 0.4 ) 
    
    # 绘制数据集中的点
    sns.scatterplot(x=X[:, 0 ], y=X[:, 1 ], hue=class_names[y], style=class_names[y], 
                    palette=colors, markers=markers, edgecolor= 'k' ) 
    
    plt.xlabel(特征名称[ 0 ]) 
    plt.ylabel(特征名称[ 1 ]) 
    plt.legend()

In [None]:
plot_decision_boundaries(clf，X，y，iris.feature_names，iris.target_names)

## 树木修剪

In [None]:
clf = DecisionTreeClassifier(max_depth=3, random_state=42)
clf.fit(X_train, y_train)

In [None]:
print ( f'训练准确率：{clf.score(X_train, y_train): .4 f} ' ) 
print ( f'测试准确率：{clf.score(X_test, y_test): .4 f} ' )

In [None]:
plot_decision_boundaries(clf，X，y，iris.feature_names，iris.target_names)

## 可视化决策树

In [None]:
from sklearn import tree

tree.plot_tree(clf, feature_names=iris.feature_names, 
               class_names=iris.target_names, filled=True, rounded=True)