In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

supermarket_df = pd.read_csv("完整数据.csv"
                             ,encoding="Windows-1252")
supermarket_df

观察到Date中Dtype为object，为便于分析，对其进行转化

In [None]:
supermarket_df["Order Date"] = pd.to_datetime(supermarket_df["Order Date"], format="mixed")
supermarket_df["Ship Date"] = pd.to_datetime(supermarket_df["Ship Date"], format="mixed")
supermarket_df.info()

此时数据类型已转换完成，接下来进行数据清洗

In [None]:
# 邮编缺失值过多，删除
supermarket_df.drop(columns="Postal Code", axis=0, inplace=True)
# 查看行是否存在重复值
print(f"Duplicated: {supermarket_df.duplicated().sum()}")
# 查看Row ID是否存在重复值
print(f"Row ID Duplicated: {supermarket_df[['Row ID']].duplicated().sum()}")

print(supermarket_df[supermarket_df.isna().any(axis=1)])

# 可视化


## 每月总销售额

## 

In [None]:
import altair as alt
alt.data_transformers.disable_max_rows()

alt.Chart(supermarket_df).mark_bar().encode(
  x="month(Order Date):T",
  y="sum(Sales):Q",
).properties(
  width=900
).interactive()

## 销售额折线图（按月分）

In [None]:
alt.Chart(supermarket_df).mark_line().encode(
  x="yearmonth(Order Date):T",
  y="sum(Sales):Q",
).properties(
  width=900
).interactive()

## 各市场订单量排名

In [None]:
alt.Chart(supermarket_df).mark_bar().encode(
  x=alt.X(field="Market", sort="-y"),
  y="sum(Sales)",
  color=alt.Color("sum(Sales):Q")
).properties(
  width=600
).interactive()

## 前20销售额最高国家

In [None]:
alt.Chart(supermarket_df).transform_aggregate(
  sales_sum="sum(Sales)",
  groupby=["Country"]
).transform_window(
    rank='rank(sales_sum)',
    sort=[alt.SortField('sales_sum', order='descending')]
).transform_filter(
    (alt.datum.rank < 20)
).mark_bar(
).encode(
  x=alt.X("sales_sum:Q", title="总销售额"),
  y=alt.Y(field="Country", sort="-x"),
  color=alt.Color("sales_sum:Q", title="总销售额")
).properties(
  width=900
)

## 订单优先级与利润关系

In [None]:
alt.Chart(supermarket_df).mark_bar(
).encode(
  y=alt.Y("Order Priority",sort="-x"),
  x=alt.X("mean(Profit)"),
  color=alt.Color("mean(Profit)")
).properties(
  width=900
)