In [825]:
import pandas as pd
import numpy as np
import datetime as dt
import json
import os

In [826]:
def get_meta_files():
    death_fp = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv'
    confirmed_fp ='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
    location_fp = '/home/zhgao/covid19-forecast-hub/data-locations/locations.csv'
    deaths = pd.read_csv(death_fp)
    confirmed = pd.read_csv(confirmed_fp)
    location = pd.read_csv(location_fp)
    ts_features = [item for item in deaths.columns if item not in ['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
           'Country_Region', 'Lat', 'Long_', 'Combined_Key', 'Population', 'Province/State', 'Country/Region', 'Long']]
    deaths = deaths.groupby('Province_State')[ts_features].sum()
    deaths = deaths.stack().reset_index().rename({'Province_State':'location_name','level_1':'target_end_date',0:'label'},axis=1)
    
    deaths['target_end_date'] = pd.to_datetime(deaths['target_end_date'])
    deaths_US = deaths.groupby('target_end_date')['label'].sum().reset_index()
    deaths_US['location_name'] = 'US'
    deaths = pd.concat([deaths, deaths_US], axis=0, ignore_index=True)
    deaths['week_label'] = deaths['label'] - deaths.groupby('location_name')['label'].shift(7)

    ts_features = [item for item in confirmed.columns if item not in ['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
           'Country_Region', 'Lat', 'Long_', 'Combined_Key', 'Population', 'Province/State', 'Country/Region', 'Long']]
    confirmed = confirmed.groupby('Province_State')[ts_features].sum()
    confirmed = confirmed.stack().reset_index().rename({'Province_State':'location_name','level_1':'target_end_date',0:'label'},axis=1)
    confirmed['target_end_date'] = pd.to_datetime(confirmed['target_end_date'])
    confirmed_US = confirmed.groupby('target_end_date')['label'].sum().reset_index()
    confirmed_US['location_name'] = 'US'
    confirmed = pd.concat([confirmed, confirmed_US], axis=0, ignore_index=True)
    confirmed['week_label'] = confirmed['label'] - confirmed.groupby('location_name')['label'].shift(7)
    return deaths, confirmed, location

In [827]:
def dump_results(LOAD_DIR='../CDC',FILE_NAME='2020-11-02-MSRA-DeepST.csv'):
    target_list = ['1 wk ahead cum death', '2 wk ahead cum death',
       '3 wk ahead cum death', '4 wk ahead cum death',
       '1 wk ahead inc death', '2 wk ahead inc death',
       '3 wk ahead inc death', '4 wk ahead inc death',
       '1 wk ahead inc case', '2 wk ahead inc case',
       '3 wk ahead inc case', '4 wk ahead inc case']
    state_list = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',
       'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas',
       'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts',
       'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana',
       'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
       'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma',
       'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
       'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont',
       'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming', 'US']
    dump_dict = dict()
    confirmed['target_end_date'] = pd.to_datetime(confirmed['target_end_date'])
    deaths['target_end_date'] = pd.to_datetime(deaths['target_end_date'])

    forecast = pd.read_csv(os.path.join(LOAD_DIR, FILE_NAME))
    forecast['target_end_date'] = pd.to_datetime(forecast['target_end_date'])
    forecast = forecast[forecast.type=='point']
    forecast = forecast[forecast.target.isin(target_list)]
    START_DATE = pd.to_datetime(forecast['target_end_date'].sort_values().values[0]) - dt.timedelta(days=7)
    deaths_now = deaths[deaths['target_end_date']==START_DATE].rename({'week_label':'last_label'},axis=1)[['location_name','last_label']]
    confirmed_now = confirmed[confirmed['target_end_date']==START_DATE].rename({'week_label':'last_label'},axis=1)[['location_name','last_label']]    
    forecast = pd.merge(forecast, location[['location','location_name']], on=['location'], how='left')
    
    forecast = forecast[forecast.location_name.isin(state_list)]
    
    forecast_cum_deaths = forecast[forecast['target'].map(lambda x:x.endswith('cum death'))]
    forecast_inc_deaths = forecast[forecast['target'].map(lambda x:x.endswith('inc death'))]
    forecast_inc_confirmed = forecast[forecast['target'].map(lambda x:x.endswith('case'))]

    forecast_cum_deaths = pd.merge(forecast_cum_deaths,deaths,on=['location_name','target_end_date'],how='left')
    forecast_inc_deaths = pd.merge(forecast_inc_deaths,deaths,on=['location_name','target_end_date'],how='left')
    forecast_inc_confirmed = pd.merge(forecast_inc_confirmed,confirmed,on=['location_name','target_end_date'],how='left')

    forecast_inc_deaths = pd.merge(forecast_inc_deaths, deaths_now, on=['location_name'], how='left')
    forecast_inc_confirmed = pd.merge(forecast_inc_confirmed, confirmed_now, on=['location_name'], how='left')

        
    START_DATE = pd.to_datetime(forecast['target_end_date'].sort_values().values[0]) - dt.timedelta(days=14)
    deaths_now = deaths[deaths['target_end_date']==START_DATE].rename({'week_label':'last2_label'},axis=1)[['location_name','last2_label']]
    confirmed_now = confirmed[confirmed['target_end_date']==START_DATE].rename({'week_label':'last2_label'},axis=1)[['location_name','last2_label']]    

    forecast_inc_deaths = pd.merge(forecast_inc_deaths, deaths_now, on=['location_name'], how='left')
    forecast_inc_confirmed = pd.merge(forecast_inc_confirmed, confirmed_now, on=['location_name'], how='left')
    
    for name,item in zip(['IncDeaths','IncCase'],[forecast_inc_deaths,forecast_inc_confirmed]):
        tmp = item.groupby(['forecast_date',
                            'location_name'])[['value',
                                               'week_label',
                                               'last_label',
                                               'target_end_date']].apply(lambda x:[list(x['value']),
                                                                                   list(x['week_label']),
                                                                                   list(x['last_label']),
                                                                                  [str(item) for item in x['target_end_date']],
                                                                                  ]).reset_index().rename({0:'DUMP'},axis=1)
        dump_dict[name] = dict()
        for item in tmp.values:
            _date,_state,_values = item
            dump_dict[name][_state] = _values

    trend_dict = dict()
    trend = forecast_inc_deaths.groupby('location_name').apply(lambda x:(list(x['value'])[1] + list(x['value'])[0] + 100*2) / (list(x['value'])[0] +  list(x['last_label'])[0] + 100.0*2)).reset_index().rename({0:'value'},axis=1)
    for item in trend.values:
        _state,_values = item
        trend_dict[_state] = _values
        
    trend_inc_dict = dict()
    trend = forecast_inc_confirmed.groupby('location_name').apply(lambda x:(list(x['value'])[1] + list(x['value'])[0] + 10000*2 ) / (list(x['value'])[0] + list(x['last_label'])[0] + 10000*2)).reset_index().rename({0:'value'},axis=1)
    for item in trend.values:
        _state,_values = item
        trend_inc_dict[_state] = _values    
        
    
    return deaths_now, confirmed_now, dump_dict, trend_dict, trend_inc_dict, forecast_inc_confirmed

In [1032]:
deaths, confirmed, location = get_meta_files()

In [829]:
LOAD_DIR = '../CDC'
FILE_NAME = '2020-11-09-MSRA-DeepST.csv'
deaths_now, confirmed_now, dump_dict, trend_dict, trend_inc_dict, forecast_inc_confirmed = dump_results(FILE_NAME=FILE_NAME)

In [834]:
forecast_inc_confirmed['last_ratio'] = forecast_inc_confirmed['last_label'] / forecast_inc_confirmed['last2_label']

In [824]:
forecast_inc_confirmed = forecast_inc_confirmed[['forecast_date','target_end_date','location_name','value']].set_index(['forecast_date',
                                                                                               'target_end_date',
                                                                                               'location_name']).unstack('target_end_date')

In [811]:
forecast_inc_confirmed.columns = [item for _,item in forecast_inc_confirmed.columns]

In [812]:
forecast_inc_confirmed = forecast_inc_confirmed.reset_index()

In [813]:
trend = pd.DataFrame(pd.concat([pd.Series(trend_dict),pd.Series(trend_inc_dict)],axis=1).max(axis=1)).rename({0:'trend'},axis=1)
# trend = pd.DataFrame(pd.Series(trend_dict)).rename({0:'trend'},axis=1)

In [814]:
trend = pd.merge(trend, confirmed_now.set_index('location_name'), left_index=True, right_index=True).reset_index().rename({'index':'location_name'},axis=1)

In [815]:
a = pd.merge(forecast_inc_confirmed,trend,on=['location_name'],how='left')

In [816]:
a.columns = [str(item).split()[0] for item in a.columns]

In [817]:
use_cols = [item for item in a.columns if item.startswith('2020')]
print(use_cols)
a[use_cols[0]] = (a['last_label'] * a['trend']**1 + a[use_cols[0]])/2.0
a[use_cols[1]] = (a['last_label'] * a['trend']**2 + a[use_cols[1]])/2.0
a[use_cols[2]] = (a['last_label'] * a['trend']**3)
a[use_cols[3]] = (a['last_label'] * a['trend']**4)

['2020-11-14', '2020-11-21', '2020-11-28', '2020-12-05']


KeyError: 'last_label'

In [780]:
a[use_cols + ['location_name']].to_csv('../SLT/{}.adj.csv'.format(FILE_NAME.split('.')[0]))

In [781]:
death_fp = '/home/zhgao/COVID19/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv'
population = pd.read_csv(death_fp)
population= population[['Province_State',
                        'Population']].groupby('Province_State')['Population'].sum().reset_index().rename({'Province_State':'States'},axis=1)
population = population.append({'States':'US','Population':population['Population'].sum()},ignore_index=True)
population.columns = ['location_name','population']

In [782]:
a = pd.merge(a, population)

In [783]:
a["Increase Cases Ratio Over The Next 4 Weeks"] = (a['trend']**4.0).map(lambda x:round(x,2))

In [784]:
a[['forecast_date', 'location_name', '2020-11-14', '2020-11-21',
       '2020-11-28', '2020-12-05','Increase Cases Ratio Over The Next 4 Weeks']].to_csv('../SLT/cases.forecast.11.09.csv')

In [785]:
use_cols = ['2020-11-14', '2020-11-21', '2020-11-28', '2020-12-05']
for col in use_cols:
    a[col+' (week inc cases/ 100K)'] = (a[col] / a['population'] * 100000).map(lambda x: round(x,2))

In [786]:
a[['forecast_date', 'location_name','2020-11-14 (week inc cases/ 100K)',
       '2020-11-21 (week inc cases/ 100K)', '2020-11-28 (week inc cases/ 100K)',
       '2020-12-05 (week inc cases/ 100K)']].to_csv('../SLT/cases.100K.forecast.11.09.csv')

In [795]:
a[['forecast_date', 'location_name', '2020-11-14 (week inc cases/ 100K)',
       '2020-11-21 (week inc cases/ 100K)', '2020-11-28 (week inc cases/ 100K)',
       '2020-12-05 (week inc cases/ 100K)']].\
sort_values('2020-12-05 (week inc cases/ 100K)',ascending=False).style.\
        background_gradient(cmap='Reds',subset=["2020-12-05 (week inc cases/ 100K)"])

Unnamed: 0,forecast_date,location_name,2020-11-14 (week inc cases/ 100K),2020-11-21 (week inc cases/ 100K),2020-11-28 (week inc cases/ 100K),2020-12-05 (week inc cases/ 100K)
33,2020-11-09,North Dakota,1303.48,1265.47,1263.85,1279.23
49,2020-11-09,Wisconsin,746.76,776.01,1056.74,1223.59
40,2020-11-09,South Dakota,1055.0,1126.92,1113.92,1194.88
24,2020-11-09,Missouri,432.11,458.43,724.26,926.19
14,2020-11-09,Iowa,799.43,742.5,849.21,910.43
26,2020-11-09,Nebraska,686.03,662.16,758.29,818.51
25,2020-11-09,Montana,655.3,746.1,722.35,779.36
22,2020-11-09,Minnesota,557.64,550.81,617.92,677.13
50,2020-11-09,Wyoming,637.32,676.47,634.31,657.32
15,2020-11-09,Kansas,464.58,457.62,583.77,641.28


In [None]:
a[['forecast_date','location_name','Increase Cases Ratio Over The Next 4 Weeks']].\
sort_values('Increase Cases Ratio Over The Next 4 Weeks',ascending=False).style.\
        background_gradient(cmap='Reds',subset=["Increase Cases Ratio Over The Next 4 Weeks"])

In [657]:
states = ['Alabama',
 'Alaska',
 'Arizona',
 'Arkansas',
 'California',
 'Colorado',
 'Connecticut',
 'Delaware',
 'Florida',
 'Georgia',
 'Hawaii',
 'Idaho',
 'Illinois',
 'Indiana',
 'Iowa',
 'Kansas',
 'Kentucky',
 'Louisiana',
 'Maine',
 'Maryland',
 'Massachusetts',
 'Michigan',
 'Minnesota',
 'Mississippi',
 'Missouri',
 'Montana',
 'Nebraska',
 'Nevada',
 'New Hampshire',
 'New Jersey',
 'New Mexico',
 'New York',
 'North Carolina',
 'North Dakota',
 'Ohio',
 'Oklahoma',
 'Oregon',
 'Pennsylvania',
 'Rhode Island',
 'South Carolina',
 'South Dakota',
 'Tennessee',
 'Texas',
 'Utah',
 'Vermont',
 'Virginia',
 'Washington',
 'West Virginia',
 'Wisconsin',
 'Wyoming']

In [758]:
results = []
for fname in [
             '2020-10-19-MSRA-DeepST.adj.csv']:
    results.append(pd.read_csv('../SLT/{}'.format(fname),index_col=0))

In [759]:
results = pd.concat(results,axis=1)

In [760]:
results.columns[:-2]

Index(['2020-10-24', '2020-10-31', '2020-11-07'], dtype='object')

In [761]:
y_true = confirmed[confirmed.target_end_date.isin(results.columns[:-2])][['location_name',
                                                                         'target_end_date',
                                                                         'week_label']]

In [762]:
results = results.set_index('location_name').stack().reset_index().rename({'level_1':'target_end_date',
                                                                 0:'pred'},axis=1)
results = results[results.target_end_date!='2020-11-14']

In [763]:
y_true = y_true[y_true.location_name.isin(states)].sort_values(['location_name','target_end_date'])
results = results[results.location_name.isin(states)].sort_values(['location_name','target_end_date'])

In [764]:
results['mae'] = np.abs(1 - results['pred'].values / y_true['week_label'].values)

In [765]:
results.groupby('target_end_date')['mae'].mean()

target_end_date
2020-10-24    0.127222
2020-10-31    0.235990
2020-11-07    0.376998
Name: mae, dtype: float64

In [1033]:
target_list_1 = [
   '1 wk ahead cum death', '2 wk ahead cum death',
   '3 wk ahead cum death', '4 wk ahead cum death',
   '1 wk ahead inc death', '2 wk ahead inc death',
   '3 wk ahead inc death', '4 wk ahead inc death',
#    '1 wk ahead inc case', '2 wk ahead inc case',
#    '3 wk ahead inc case', '4 wk ahead inc case'
    ]
target_list_2 = [
#    '1 wk ahead cum death', '2 wk ahead cum death',
#    '3 wk ahead cum death', '4 wk ahead cum death',
#    '1 wk ahead inc death', '2 wk ahead inc death',
#    '3 wk ahead inc death', '4 wk ahead inc death',
   '1 wk ahead inc case', '2 wk ahead inc case',
   '3 wk ahead inc case', '4 wk ahead inc case']
state_list = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
   'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',
   'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas',
   'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts',
   'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana',
   'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
   'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma',
   'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
   'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont',
   'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming', 'US']

prophet = pd.read_csv('../SLT/prophet.forecast.csv')
prophet.columns = ['NAN','target_end_date','yhat','location_name','predict_week']
prophet = prophet[['target_end_date','location_name','predict_week']]
deep = pd.read_csv('../CDC/2020-11-09-MSRA-DeepST.csv')
deep = pd.merge(deep, location[['location','location_name']], on=['location'], how='left')

In [1034]:
us = prophet.groupby('target_end_date')['predict_week'].sum().reset_index()
us['location_name'] = 'US'
prophet = pd.concat([prophet,us]).reset_index(drop=True)

In [1035]:
deep_deaths = deep[deep.target.isin(target_list_1)][deep.type=='point']
deep = deep[deep.target.isin(target_list_2)][deep.type=='point']

  """Entry point for launching an IPython kernel.
  


In [1036]:
en = pd.merge(deep,prophet[['target_end_date','location_name','predict_week']],on=['target_end_date','location_name'],how='left')

In [1037]:
en['forecasting'] = en['predict_week'].map(abs)*0.2 + en['value']*0.8

In [1038]:
en = pd.merge(en,population,on=['location_name'],how='left')

In [1039]:
en = en.rename({'value':'DeepST_predict',
           'predict_week':'Prophet_predict',
           'forecasting':'Ensemble_predict'},
              axis=1)

In [1040]:
en['DeepST_predict_perday_100k'] = (en['DeepST_predict'] / en['population'] * 100000/ 7).map(lambda x:round(x,2))
en['Prophet_predict_perday_100k'] = (en['Prophet_predict'] / en['population'] * 100000/ 7).map(lambda x:round(x,2))
en['Ensemble_predict_perday_100k'] = (en['Ensemble_predict'] / en['population'] * 100000/7).map(lambda x:round(x,2))

In [1044]:
en['DeepST_ratio'] = (en['DeepST_predict'] / en['week_label']).map(lambda x:round(x,2))
en['Prophet_ratio'] = (en['Prophet_predict'] / en['week_label']).map(lambda x:round(x,2))
en['Ensemble_ratio'] = (en['Ensemble_predict'] / en['week_label']).map(lambda x:round(x,2))

In [1045]:
en[['location_name','forecast_date', 'target', 'target_end_date','DeepST_predict','Prophet_predict','Ensemble_predict',
   'DeepST_ratio','Prophet_ratio','Ensemble_ratio']].to_csv('../SLT/cases.forecast.11.09.csv')

In [1046]:
en[['location_name','forecast_date', 'target', 'target_end_date','DeepST_predict_perday_100k','Prophet_predict_perday_100k','Ensemble_predict_perday_100k']].to_csv('../SLT/cases.100K.forecast.11.09.csv')

In [1047]:
lastweek = confirmed[confirmed.target_end_date=='2020-11-07'][['location_name','week_label']]

In [1048]:
en = pd.merge(en,lastweek,on=['location_name'],how='left')

In [1049]:
tmp = en[['location_name','forecast_date', 'target', 'target_end_date','Ensemble_ratio']].rename({'Ensemble_ratio':
                                                                                                  'Increase Cases Ratio Over The Next 4 Weeks'},axis=1)

In [1050]:
tmp[tmp.target_end_date=='2020-12-05'].sort_values('Increase Cases Ratio Over The Next 4 Weeks',ascending=False).style.background_gradient(cmap='Reds',subset=["Increase Cases Ratio Over The Next 4 Weeks"])

Unnamed: 0,location_name,forecast_date,target,target_end_date,Increase Cases Ratio Over The Next 4 Weeks
197,Vermont,2020-11-09,4 wk ahead inc case,2020-12-05,2.76
188,Oklahoma,2020-11-09,4 wk ahead inc case,2020-12-05,1.89
181,New Hampshire,2020-11-09,4 wk ahead inc case,2020-12-05,1.73
160,Delaware,2020-11-09,4 wk ahead inc case,2020-12-05,1.68
163,Hawaii,2020-11-09,4 wk ahead inc case,2020-12-05,1.63
175,Minnesota,2020-11-09,4 wk ahead inc case,2020-12-05,1.6
200,West Virginia,2020-11-09,4 wk ahead inc case,2020-12-05,1.56
180,Nevada,2020-11-09,4 wk ahead inc case,2020-12-05,1.56
171,Maine,2020-11-09,4 wk ahead inc case,2020-12-05,1.55
167,Iowa,2020-11-09,4 wk ahead inc case,2020-12-05,1.55


In [1051]:
tmp = en[['location_name','forecast_date', 'target', 'target_end_date','Ensemble_predict_perday_100k']].rename({'Ensemble_predict_perday_100k':
                                                                                                  'Day inc cases / 100K'},axis=1)

In [1052]:
tmp[tmp.target_end_date=='2020-12-05'].sort_values('Day inc cases / 100K',ascending=False).style.background_gradient(cmap='Reds',subset=["Day inc cases / 100K"])

Unnamed: 0,location_name,forecast_date,target,target_end_date,Day inc cases / 100K
186,North Dakota,2020-11-09,4 wk ahead inc case,2020-12-05,225.62
193,South Dakota,2020-11-09,4 wk ahead inc case,2020-12-05,183.11
167,Iowa,2020-11-09,4 wk ahead inc case,2020-12-05,152.34
179,Nebraska,2020-11-09,4 wk ahead inc case,2020-12-05,119.56
202,Wyoming,2020-11-09,4 wk ahead inc case,2020-12-05,115.67
201,Wisconsin,2020-11-09,4 wk ahead inc case,2020-12-05,111.82
178,Montana,2020-11-09,4 wk ahead inc case,2020-12-05,109.73
175,Minnesota,2020-11-09,4 wk ahead inc case,2020-12-05,107.55
196,Utah,2020-11-09,4 wk ahead inc case,2020-12-05,97.16
165,Illinois,2020-11-09,4 wk ahead inc case,2020-12-05,94.99


In [1002]:
tmp[tmp.target_end_date=='2020-12-05']

Unnamed: 0,location_name,forecast_date,target,target_end_date,Ensemble_predict_perday_100k
153,Alabama,2020-11-09,4 wk ahead inc case,2020-12-05,43.25
154,Alaska,2020-11-09,4 wk ahead inc case,2020-12-05,104.14
155,Arizona,2020-11-09,4 wk ahead inc case,2020-12-05,50.84
156,Arkansas,2020-11-09,4 wk ahead inc case,2020-12-05,79.98
157,California,2020-11-09,4 wk ahead inc case,2020-12-05,19.35
158,Colorado,2020-11-09,4 wk ahead inc case,2020-12-05,107.64
159,Connecticut,2020-11-09,4 wk ahead inc case,2020-12-05,16.55
160,Delaware,2020-11-09,4 wk ahead inc case,2020-12-05,44.86
161,Florida,2020-11-09,4 wk ahead inc case,2020-12-05,41.25
162,Georgia,2020-11-09,4 wk ahead inc case,2020-12-05,94.8
