# 实验四：决策树分类器 - Wine数据集分析与可视化

In [None]:
from sklearn import tree
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# 设置中文显示
plt.rcParams['font.sans-serif'] = [u'simHei']
plt.rcParams['axes.unicode_minus'] = False

## 加载 Wine 数据集并预览

In [None]:
wine = load_wine()
print("data:\n", wine.data)
print("target:\n", wine.target)
print("datashape:\n", wine.data.shape)
print("feature_name:\n", wine.feature_names)
print("target_name:\n", wine.target_names)

## 转换为 DataFrame 查看前几行

In [None]:
df = pd.concat([pd.DataFrame(wine.data, columns=wine.feature_names), pd.DataFrame(wine.target, columns=['target'])], axis=1)
print("here is the table:\n", df.head())

## 拆分训练集与测试集

In [None]:
Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data, wine.target, test_size=0.3)
print("\nTrain shape:", Xtrain.shape)
print("Test shape:", Xtest.shape)

## 训练决策树模型（信息增益）并计算准确率

In [None]:
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf.fit(Xtrain, Ytrain)
score = clf.score(Xtest, Ytest)
print("score:\n", score)

## 特征重要性排序与可视化

In [None]:
feature_name = ['酒精', '苹果酸', '灰', '灰的碱性', '镁', '总酚', '类黄酮', '非黄唍类酚类', '花青素', '颜色强度', '色调', 'od280/od315','脯氨酸']
fea_importances = clf.feature_importances_
fea_zip = sorted(zip(feature_name, fea_importances), key=lambda x: x[1], reverse=True)
print("特征分析（按重要性排序）:\n", fea_zip)

In [None]:
# 可视化
features = [f[0] for f in fea_zip]
importances = [f[1] for f in fea_zip]

plt.figure(figsize=(10, 6))
sns.barplot(x=importances, y=features, palette="viridis")
plt.title("Feature Importance in Decision Tree Classifier")
plt.xlabel("Importance")
plt.ylabel("Feature")
plt.show()

## 附加实验：预剪枝决策树

In [None]:
clf = DecisionTreeClassifier(
    max_depth=4,
    min_samples_split=5,
    min_samples_leaf=3,
    max_features=None,
    min_impurity_decrease=0.01
)

clf.fit(Xtrain, Ytrain)
accuracy = clf.score(Xtest, Ytest)
print(f"预剪枝后的模型准确度: {accuracy:.4f}")