# 疫情数据分析

## 一、采集数据

### 1.导入库

In [12]:
# 导入库
import json
import time
import requests
import pandas as pd

### 2.爬取
链接示例： https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=jQuery3510004227748571102641_1610267182995&_=1610267182996

In [4]:
# 测试行
# time.time()

In [13]:
# 目标url
# 尾部time.time()表示时间戳 *1000将小数点向后移动三位
url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&_=%d'%int(time.time()*1000)

### 3.请求数据

In [14]:
html = requests.get(url)
print(html.text)

{"ret":0,"data":"{\"lastUpdateTime\":\"2021-01-24 19:04:02\",\"chinaTotal\":{\"confirm\":100017,\"heal\":92446,\"dead\":4811,\"nowConfirm\":2760,\"suspect\":2,\"nowSevere\":94,\"importedCase\":4604,\"noInfect\":1017,\"showLocalConfirm\":1,\"showlocalinfeciton\":1,\"localConfirm\":1503,\"noInfectH5\":744,\"localConfirmH5\":1502},\"chinaAdd\":{\"confirm\":174,\"heal\":149,\"dead\":1,\"nowConfirm\":100,\"suspect\":-1,\"nowSevere\":9,\"importedCase\":15,\"noInfect\":92,\"localConfirm\":46,\"noInfectH5\":82,\"localConfirmH5\":65},\"isShowAdd\":true,\"showAddSwitch\":{\"all\":true,\"confirm\":true,\"suspect\":true,\"dead\":true,\"heal\":true,\"nowConfirm\":true,\"nowSevere\":true,\"importedCase\":true,\"noInfect\":true,\"localConfirm\":true,\"localinfeciton\":true},\"areaTree\":[{\"name\":\"中国\",\"today\":{\"confirm\":174,\"isUpdated\":true},\"total\":{\"nowConfirm\":2760,\"confirm\":100017,\"suspect\":2,\"dead\":4811,\"deadRate\":\"4.81\",\"showRate\":false,\"heal\":92446,\"healRate\":\"92.

### 4.网页解析

In [15]:
data = json.loads(html.json()['data'])
china_data = data['areaTree'][0]['children']

data_set = []

for i in china_data:
    data_dict = {}
    # 地区名称
    data_dict['province'] = i['name']
    
    # ==疫情数据==
    # 新增确诊
    data_dict['nowConfirm'] = i['total']['nowConfirm']
    # 累计确诊
    data_dict['confirm'] = i['total']['confirm']
    # 死亡人数
    data_dict['dead'] = i['total']['dead']
    # 痊愈人数
    data_dict['heal'] = i['total']['heal']
    # 死亡率
    data_dict['deadRate'] = i['total']['deadRate']
    # 治愈率
    data_dict['healRate'] = i['total']['healRate']
    
    
    # 加入数据
    data_set.append(data_dict)
# data_set为列表 其中为疫情数据


df = pd.DataFrame(data_set)
# df


# 时间解析
lastUpdateTime = data['lastUpdateTime'][0:20]
print(lastUpdateTime)

2021-01-24 19:04:02


### 5.数据处理

In [16]:
df.to_csv(r'E:\Python\数据分析\疫情数据分析\数据.csv')

## 二、数据可视化
- matplotlib
- pyecharts

### 1.导入库

In [17]:
from pyecharts import options as opts
from pyecharts.charts import Bar,Line,Pie,Grid,Map

In [30]:
# 缩短表 Top10[:5]
df_short = df.sort_values(by=['nowConfirm'], ascending=False)[:10]
df_short

Unnamed: 0,province,nowConfirm,confirm,dead,heal,deadRate,healRate
0,香港,883,10085,169,9033,1.68,89.57
1,河北,841,1288,7,440,0.54,34.16
2,黑龙江,382,1359,13,964,0.96,70.93
3,吉林,206,363,2,155,0.55,42.7
4,上海,102,1641,7,1532,0.43,93.36
5,台湾,95,889,7,787,0.79,88.53
6,北京,51,1025,9,965,0.88,94.15
7,广东,43,2104,8,2053,0.38,97.58
8,陕西,24,534,3,507,0.56,94.94
9,天津,24,336,3,309,0.89,91.96


In [26]:
[list(i) for i in zip(df_short['province'].values.tolist(), df_short['nowConfirm'].values.tolist())]

[['香港', 883],
 ['河北', 841],
 ['黑龙江', 382],
 ['吉林', 206],
 ['上海', 102],
 ['台湾', 95],
 ['北京', 51],
 ['广东', 43],
 ['陕西', 24],
 ['天津', 24]]

In [29]:
# 饼图
pie = (
    Pie()
    .add(
        "",
        [list(i) for i in zip(df_short['province'].values.tolist(), df_short['nowConfirm'].values.tolist())],
        radius = ["10%", "30%"]
    )
    .set_global_opts(
        legend_opts = opts.LegendOpts(orient = 'vertical', pos_top = "70%", pos_left = "70%"),
    )
    
#     .set_global_opts(title_opts=opts.TitleOpts(
#     title="全国疫情现存确诊 Top10",
#     subtitle="更新日期：%a"%lastUpdateTime,
#     title_link='https://news.qq.com/zt2020/page/feiyan.htm#/',
#     )
#                     )
    .set_series_opts(label_opts = opts.LabelOpts(formatter = "{b} : {c}"))
                    )
pie.render_notebook()