In [201]:
import pandas as pd

df = pd.read_csv("../data/preprocess/pre-processed.csv")

# 检查数据集的表头以及其数据类型
print(df.dtypes)

Field ID          object
Crop ID            int64
Crop Name         object
Crop Type         object
Planting Area    float64
Season            object
Field Type        object
Field Area       float64
Yield            float64
Cost             float64
Price            float64
dtype: object


# 不同地块的情况

## 成本与售价

In [202]:
import plotly.express as px

fig = px.bar(
    df,
    x=df["Field ID"] + " " + df["Crop Name"],
    y="Price",
    title="同一作物在不同地块下的销售单价",
    labels={"x": "作物", "Price": "销售单价 (元/斤)", "Crop Name": "作物"},
    color="Crop Name",
    log_y=True,
    height=750,
    width=1500,
)
fig.update_layout(showlegend=False)
fig.show()

import plotly.express as px

fig = px.bar(
    df,
    x=df["Field ID"] + " " + df["Crop Name"],
    y="Cost",
    title="同一作物在不同地块下的种植成本（元/亩）",
    labels={"x": "作物", "Cost": "种植成本（元/亩）", "Crop Name": "作物"},
    color="Crop Name",
    log_y=True,
    height=750,
    width=1500,
)
fig.update_layout(showlegend=False)
fig.show()

## 种植面积

In [203]:
import plotly.express as px

fig = px.bar(
    df,
    x="Field ID",
    y="Planting Area",
    title="每个地块的作物种植面积情况",
    labels={
        "x": "作物",
        "Price": "销售单价 (元/斤)",
        "Crop Name": "作物",
        "Planting Area": "种植面积",
        "Field ID": "地块编号",
    },
    color="Crop Name",
    log_y=True,
    category_orders={"Field ID": df["Field ID"].unique()},
    text="Crop Name",
    height=750,
    width=1500,
)


# 显示图表
fig.show()

# 不同作物的情况

## 成本与售价

In [204]:
import plotly.express as px
from plotly.subplots import make_subplots

# 计算每种作物在不同类型地块上面的价格方差
price_var = df.groupby(["Crop Name"])["Price"].var().fillna(0)
cost_var = df.groupby(["Crop Name"])["Cost"].var().fillna(0)

# 使用 Plotly Express 创建第一个图表
fig1 = px.bar(
    x=price_var.index,
    y=price_var,
    labels={"x": "作物", "y": "价格方差（元/斤）"},
)

# 使用 Plotly Express 创建第二个图表
fig2 = px.bar(
    x=cost_var.index,
    y=cost_var,
    labels={"x": "作物", "y": "价格方差（元/斤）"},
)

# 创建子图
fig = make_subplots(
    rows=1,
    cols=2,
    subplot_titles=("价格方差（元/斤）", "成本方差（元/斤）"),
)

# 将第一个图表添加到子图中
for trace in fig1.data:
    fig.add_trace(trace, row=1, col=1)

# 将第二个图表添加到子图中
for trace in fig2.data:
    fig.add_trace(trace, row=1, col=2)

# 设置子图的高度和宽度
fig.update_layout(
    height=400,  # 设置高度
    width=1500,
)

fig.show()


# 计算每种作物在不同地块类型的平均价格
avg_price_df = df.groupby(["Crop Name"])["Price"].agg("mean")
fig = px.bar(
    avg_price_df,
    color=avg_price_df.index,
    title="各种作物在不同地块类型下的平均价格（元/斤）",
    labels={
        "Field Type": "地块类型",
        "value": "平均价格（元/斤）",
        "Crop Name": "作物名称",
    },
    log_y=True,
    height=750,
    width=1500,
)
fig.update_layout(showlegend=False)
fig.show()

## 种植面积

In [205]:
import plotly.express as px


# 计算每种作物的种植面积
planting_areas = df.groupby(["Crop Name"])["Planting Area"].agg("sum")
fig = px.bar(
    planting_areas,
    color=planting_areas.index,
    title="各种作物的种植面积（亩）",
    labels={
        "value": "总种植面积（亩）",
        "Crop Name": "作物名称",
    },
    log_y=True,
    height=750,
    width=1500,
)
fig.show()