In [1]:
import pandas as pd
import cufflinks as cf
cf.go_offline()
import os
import ast

In [2]:
area = pd.read_csv('../data/ncor_data/DXYArea.csv')
area['updateTime'] = pd.to_datetime(area['updateTime'])

province_cols = [col for col in area.columns if 'province' in col]
province = area[['updateTime'] + province_cols].drop_duplicates()
province.set_index('updateTime', inplace=True)
province.head()

Unnamed: 0_level_0,provinceName,provinceEnglishName,province_zipCode,province_confirmedCount,province_suspectedCount,province_curedCount,province_deadCount
updateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-02-17 13:15:41.559,广东省,Guangdong,440000,1322,0,484,4
2020-02-17 13:13:39.175,广东省,Guangdong,440000,1322,0,483,4
2020-02-17 13:12:37.938,四川省,Sichuan,510000,495,0,137,3
2020-02-17 13:07:31.627,安徽省,Anhui,340000,973,0,278,6
2020-02-17 13:07:31.627,河北省,Hebei,130000,301,0,111,3


# 增长趋势

In [3]:
province = province[province['provinceName'] != '湖北省']
province['IsHlJ'] = (province['provinceName'] == '黑龙江省').map({True: '黑龙江省',False:'其他省（除武汉）'})
province['provinceNameCopy'] = province['provinceName'].copy()
# province['provinceNameCopy'][~province['provinceName'].isin(['黑龙江省', '广东省', '浙江省'])] = '其他省(除湖北)'

province_d_counts = province.groupby('IsHlJ').resample('d')['province_confirmedCount'].max().reset_index()
province_d_counts = pd.pivot_table(province_d_counts, index='updateTime', columns='IsHlJ', values='province_confirmedCount')

province_d_increase = province_d_counts.apply(lambda x: x - x.shift(1))
province_d_increase = province_d_increase.rolling('3d').mean().round(2)
# province_d_increase = province_d_increase.resample('3d').sum()
province_d_increase_nor = ((province_d_increase-province_d_increase.min())/(province_d_increase.max()-province_d_increase.min())).round(2)
province_d_increase_nor.index = province_d_increase_nor.index.astype(str).str.slice(6, 10).str.replace('-', '/')

In [4]:
province_d_increase_nor.iplot(xTitle='日期', yTitle='确诊人数增长率',)

# 迁徙率

In [5]:
move_dir = '../data/baidu-migration-master/data'
move_files = os.listdir(move_dir)
move_files = [f for f in move_files if 'wuhan_move_out_province' in f]
move_files.sort() 

move_out_dict = {}

for f in move_files:
    if '20200123' > f[:8] > '20200109': # 时间待确定
        print(f)
        
        data = open(os.path.join(move_dir, f)).read()
        data = ast.literal_eval(data)['data']['list']
        
        dict_ = {}
        for i in data:
            dict_[i['province_name']] = i['value']
            
        move_out_dict[f[:8]] = dict_

20200110_wuhan_move_out_province
20200111_wuhan_move_out_province
20200112_wuhan_move_out_province
20200113_wuhan_move_out_province
20200114_wuhan_move_out_province
20200115_wuhan_move_out_province
20200116_wuhan_move_out_province
20200117_wuhan_move_out_province
20200118_wuhan_move_out_province
20200119_wuhan_move_out_province
20200120_wuhan_move_out_province
20200121_wuhan_move_out_province
20200122_wuhan_move_out_province


In [6]:
current_counts = province.groupby('provinceName')['province_confirmedCount'].max().sort_values().to_frame()
move_out_df = pd.DataFrame.from_dict(move_out_dict, orient='index')
move_out_df.index = pd.to_datetime(move_out_df.index)
current_counts['move_out'] = move_out_df.mean().round(2)
current_counts = current_counts[current_counts.index != '湖北省']
current_counts.dropna(inplace=True)

In [9]:
current_counts

Unnamed: 0_level_0,province_confirmedCount,move_out
provinceName,Unnamed: 1_level_1,Unnamed: 2_level_1
西藏自治区,1,0.02
青海省,18,0.07
宁夏回族自治区,70,0.1
内蒙古自治区,72,0.2
新疆维吾尔自治区,75,0.24
吉林省,89,0.18
甘肃省,90,0.38
辽宁省,121,0.36
天津市,125,0.17
山西省,129,0.63


In [7]:
annotations = [{
    'x': current_counts.loc['黑龙江省']['move_out'],
    'y': current_counts.loc['黑龙江省']['province_confirmedCount'],
    'text': '黑龙江省'
}, {
    'x': current_counts.loc['浙江省']['move_out'],
    'y': current_counts.loc['浙江省']['province_confirmedCount'],
    'text': '浙江省'
}, {
    'x': current_counts.loc['广东省']['move_out'],
    'y': current_counts.loc['广东省']['province_confirmedCount'],
    'text': '广东省'
}]

In [8]:
current_counts.iplot(kind='scatter',
                     mode='markers',
                     x='move_out',
                     y='province_confirmedCount',
                     colors=['red'],
                     xTitle='武汉迁出人口各省占比(%)',
                     yTitle='确诊人数',
                     bestfit=True,
                     bestfit_colors=['red'],
                     annotations=annotations)