In [1]:
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Map, Bar, Timeline

In [None]:
# 读取文件
df = pd.read_excel('CityData.xlsx')
df

In [None]:
group_df = df.groupby(['provinceName','updateTime'])['city_confirmedCount'].sum()
df = pd.DataFrame(group_df).reset_index()
df

### task1: 统计截止4月1日各省的累计确诊数量，并通过图表展示

In [None]:
provinces = list(set(df['provinceName'].to_list()))
provinces.sort()

data_0401 = {province: 0 for province in provinces}

grouped = df.loc[df['updateTime']<pd.Timestamp('2020-04-01 00:00:00'),:].groupby(['provinceName'])
for province,sub_df in grouped:
    print(province)
    print(sub_df)
    data_0401[province] = int(sub_df.iloc[-1,2])

data_0401 = list(data_0401.values())
data_0401

In [None]:
res = (
    Bar()
    .add_xaxis(
        xaxis_data=provinces
    )
    .add_yaxis(
        series_name="截止4.1日各省累计确诊人数",
        y_axis=data_0401
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="截止4.1日全国疫情条形图"
        ),
        xaxis_opts=opts.AxisOpts(
            axislabel_opts=opts.LabelOpts(rotate=-45)
        ),
        visualmap_opts=opts.VisualMapOpts(
            min_=min(data_0401),
            max_=max(data_0401)
        )
    )
)
res.render_notebook()

### task2: 对每日各省的累计确诊患者数量进行统计，为图像添加时间轴，通过时间轴自动播放演示疫情发展的整个过程

In [6]:
date = list(set(list(df['updateTime'])))
date.sort()
# print(date)

grouped = df.groupby(['updateTime'])

In [7]:
# test
# sub_df = grouped.get_group(pd.Timestamp('2020-04-01 00:00:00'))
# for _,row in sub_df.iterrows():
#     print(row['provinceName'],row['city_confirmedCount'])

In [None]:
data = {province: 0 for province in provinces}

def get_data(day):
    # day: pd.Timestamp
    sub_df = grouped.get_group(day)
    for _,row in sub_df.iterrows():
        data[row['provinceName']] = int(row['city_confirmedCount'])
    # excel中的省必须删除后缀省/市/自治区/壮族/回族/维吾尔才能使用
    # 因为Map()里的data_pair接收的参数就是这种格式，否则识别不了
    my_data = [[key.strip("省").strip("市").strip("自治区").strip("壮族").strip("回族").strip("维吾尔"), value] for key, value in data.items()]
    return my_data

data

In [9]:
# test
# get_data(pd.Timestamp('2020-04-01 00:00:00'))

In [None]:
# 创建一个时间线轮播图对象
t1 = Timeline()
# 循环创建与时间点相应的map
for day in date:
    my_map = (
        Map()
        .add(
            series_name="截止{}全国疫情确诊数量".format(day),
            data_pair=get_data(day)
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(
                title="{}全国疫情确诊地图".format(day),
            ),
            visualmap_opts=opts.VisualMapOpts(
                # 是否为分段型
                is_piecewise=True,
                pieces=[
                    {"min": 1, "max": 99, "label": "1-99人", "color": "#FFEFD5"},
                    {"min": 100, "max": 999, "label": "100-999人", "color": "#FFD700"},
                    {"min": 1000, "max": 9999, "label": "1000-9999人", "color": "#FF6347"},
                    {"min": 10000, "max": 999999, "label": ">10000人", "color": "#B22222"},
                ]
            )
        )
    )
    t1.add(my_map, day)
    t1.add_schema(
        # 是否自动播放
        is_auto_play=True,
        # 是否循环播放
        is_loop_play=True,
        # 间隔
        play_interval=1000
    )
t1.render_notebook()