# 数据可视化

## 绘制鸢尾花数据集的特征分布图，四个图表绘制在一个 figure 中，要求图表类型和下图一致，图表修饰部分自定义。


In [None]:
import matplotlib.pyplot as plt
from matplotlib.markers import MarkerStyle
import pandas as pd
import numpy as np

plt.rcParams["font.sans-serif"] = ["Heiti TC"]

df = pd.read_csv("./python_data/iris.csv", nrows=50)

df_sorted = df.sort_values(by="petal_length").iloc[:, 2]
df_sorted.head()

In [None]:
figure1 = plt.figure(figsize=(20, 16), dpi=200)
figure1.add_subplot(2, 2, 1)
x1 = df.iloc[:, 2]
y1 = df.iloc[:, 0]
plt.xlabel("Len of Petal")
plt.ylabel("Len of Sepal")
mark = MarkerStyle("o")
plt.scatter(x1, y1, marker=mark, c="r")
plt.title("花瓣/花萼长度散点图")

figure1.add_subplot(2, 2, 2)
m1 = np.array(df.iloc[:, 2])
fre_tuple = plt.hist(m1, bins=10, histtype="step", edgecolor="steelblue", color="white")
plt.xlabel("Len of Petal")
plt.ylabel("Count")
plt.title("花瓣长度直方图")
x_loc = fre_tuple[0]
y_loc = fre_tuple[1]
# print(x_loc,'\n',y_loc)
for x, y in zip(x_loc, y_loc):
    plt.text(y + 0.045, x + 0.1, "%.0f" % x, ha="center", va="bottom", fontsize=15)


figure1.add_subplot(2, 2, 3)
x2 = np.asarray(range(0, 50))
y2 = np.asarray(df.iloc[:, 0])
x2_xticks = np.arange(0, len(x2), 5)
plt.xticks(x2_xticks)
plt.bar(x2, y2, facecolor="orange", edgecolor="white")
plt.xlabel("index")
plt.ylabel("Len of Sepal")
plt.title("花萼长度条形图")


figure1.add_subplot(2, 2, 4)
plt.axis("equal")
exploded = [0, 0, 0, 0, 0.05, 0.05, 0, 0, 0, 0]
plt.pie(
    x=x_loc / 50,
    labels=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"],
    explode=exploded,
    autopct="%.2f%%",  # 设置百分比的格式，保留两位小数
    pctdistance=0.8,  # 设置百分比标签与圆心的距离
    labeldistance=1.1,  # 设置标签与圆心的距离
    startangle=180,  # 设置饼图的初始角度
    radius=1.2,  # 设置饼图的半径
    counterclock=False,  # 是否逆时针，False为顺时针方向
    wedgeprops={"linewidth": 0.8, "edgecolor": "white"},  # 设置饼图内外边界的属性值
    textprops={"fontsize": 20, "color": "black"},  # 设置文本标签的属性值
)
plt.title("花瓣长度饼图", fontsize=20)