# 疫情数据分析

## 一、采集数据

### 1.导入库

In [3]:
# 导入库
import json
import time
import requests
import pandas as pd

### 2.爬取
链接示例： https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=jQuery3510004227748571102641_1610267182995&_=1610267182996

In [5]:
# 测试行
# time.time()

1610268032.3811333

In [7]:
# 目标url
# 尾部time.time()表示时间戳 *1000将小数点向后移动三位
url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&_=%d'%int(time.time()*1000)

### 3.请求数据

In [9]:
html = requests.get(url)
print(html.text)

{"ret":0,"data":"{\"lastUpdateTime\":\"2021-01-10 15:35:56\",\"chinaTotal\":{\"confirm\":97558,\"heal\":91353,\"dead\":4798,\"nowConfirm\":1407,\"suspect\":1,\"nowSevere\":16,\"importedCase\":4412,\"noInfect\":470,\"showLocalConfirm\":1,\"localConfirm\":332},\"chinaAdd\":{\"confirm\":171,\"heal\":91,\"dead\":2,\"nowConfirm\":78,\"suspect\":0,\"nowSevere\":0,\"importedCase\":21,\"noInfect\":27,\"localConfirm\":78},\"isShowAdd\":true,\"showAddSwitch\":{\"all\":true,\"confirm\":true,\"suspect\":true,\"dead\":true,\"heal\":true,\"nowConfirm\":true,\"nowSevere\":true,\"importedCase\":true,\"noInfect\":true,\"localConfirm\":true},\"areaTree\":[{\"name\":\"中国\",\"today\":{\"confirm\":171,\"isUpdated\":true},\"total\":{\"nowConfirm\":1407,\"confirm\":97558,\"suspect\":1,\"dead\":4798,\"deadRate\":\"4.92\",\"showRate\":false,\"heal\":91353,\"healRate\":\"93.64\",\"showHeal\":true,\"wzz\":0},\"children\":[{\"name\":\"香港\",\"today\":{\"confirm\":59,\"confirmCuts\":0,\"isUpdated\":true,\"tip\":\"\

### 4.网页解析

In [95]:
data = json.loads(html.json()['data'])
china_data = data['areaTree'][0]['children']

data_set = []

for i in china_data:
    data_dict = {}
    # 地区名称
    data_dict['province'] = i['name']
    
    # ==疫情数据==
    # 新增确诊
    data_dict['nowConfirm'] = i['total']['nowConfirm']
    # 累计确诊
    data_dict['confirm'] = i['total']['confirm']
    # 死亡人数
    data_dict['dead'] = i['total']['dead']
    # 痊愈人数
    data_dict['heal'] = i['total']['heal']
    # 死亡率
    data_dict['deadRate'] = i['total']['deadRate']
    # 治愈率
    data_dict['healRate'] = i['total']['healRate']
    
    
    # 加入数据
    data_set.append(data_dict)
# data_set为列表 其中为疫情数据


df = pd.DataFrame(data_set)
# df


# 时间解析
lastUpdateTime = data['lastUpdateTime'][0:20]
print(lastUpdateTime)

2021-01-10 15:35:56


### 5.数据处理

In [34]:
df.to_csv(r'E:\Python\数据分析\疫情数据分析\数据.csv')

## 二、数据可视化
- matplotlib
- pyecharts

### 1.导入库

In [51]:
from pyecharts import options as opts
from pyecharts.charts import Bar,Line,Pie,Grid,Map

In [75]:
# 缩短表 Top5
df_short = df.sort_values(by=['nowConfirm'], ascending=False)[:5]
df_short

Unnamed: 0,province,nowConfirm,confirm,dead,heal,deadRate,healRate
0,香港,680,9211,157,8374,1.7,90.91
1,河北,225,598,6,367,1.0,61.37
2,上海,106,1562,7,1449,0.45,92.77
3,台湾,101,828,7,720,0.85,86.96
4,辽宁,71,387,2,314,0.52,81.14


In [42]:
[list(i) for i in zip(df_short['province'].values.tolist(), df_short['nowConfirm'].values.tolist())]

[['香港', 680], ['河北', 225], ['上海', 106], ['台湾', 101], ['辽宁', 71]]

In [101]:
# 饼图
pie = (
    Pie()
    .add(
        "",
        [list(i) for i in zip(df_short['province'].values.tolist(), df_short['nowConfirm'].values.tolist())],
        radius = ["10%", "30%"]
    )
    .set_global_opts(
        legend_opts = opts.LegendOpts(orient = 'vertical', pos_top = "70%", pos_left = "70%"),
    )
    
    .set_global_opts(title_opts=opts.TitleOpts(
    title="全国疫情现存确诊 Top5",
    subtitle="更新日期：%a"%lastUpdateTime,
    title_link='https://news.qq.com/zt2020/page/feiyan.htm#/',
    )
                    )
    .set_series_opts(label_opts = opts.LabelOpts(formatter = "{b} : {c}"))
                    )
pie.render_notebook()