In [47]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [48]:
### Initial imports
import logging
import numpy as np
import pandas as pd
import pymc3 as pm
import theano.tensor as T
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("ticks")

logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)

import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

from epimodel.pymc3_models import cm_effect
from epimodel.pymc3_models.cm_effect.datapreprocessor import DataMerger

%matplotlib inline

In [49]:
# name, epidemicforecasting.org code, OxCGRT code
# + lithuania
region_info = [
    ("Andorra", "AD", "AND"),
    ("Austria", "AT",  "AUT"),
    ("Albania","AL","ALB"),
    ("Bosnia and Herzegovina", "BA", "BIH"),
    ("Belgium", "BE", "BEL"),
    ("Bulgaria",  "BG", "BGR"),
    ("Switzerland", "CH", "CHE"),
    ("Czech Republic", "CZ", "CZE"),
    ("Germany", "DE", "DEU"),
    ("Denmark", "DK", "DNK"),
    ("Estonia","EE", "EST"),
    ("Spain", "ES", "ESP"),
    ("Finland", "FI", "FIN"),
    ("France", "FR", "FRA"),
    ("United Kingdom", "GB", "GBR"), 
    ("Georgia","GE","GEO"),
    ("Greece", "GR", "GRC"),
    ("Croatia", "HR", "HRV"),
    ("Hungary", "HU", "HUN"),
    ("Ireland", "IE", "IRL"),
    ("Israel", "IL", "ISR"),
    ("Iceland", "IS", "ISL"),
    ("Italy", "IT", "ITA"),
    ("Lithuania","LT","LTU"),
    ("Latvia","LV","LVA"),
    ("Malta","MT","MLT"),
    ("Morocco", "MA", "MAR"),
    ("Mexico","MX", "MEX"),
    ("Malaysia", "MY", "MYS"),
    ("Netherlands", "NL", "NLD"),
    ("Norway","NO","NOR"),
    ("New Zealand","NZ","NZL"),
    ("Poland","PL", "POL"),
    ("Portugal","PT","PRT"),
    ("Romania","RO", "ROU"),
    ("Serbia", "RS", "SRB"),
    ("Sweden", "SE", "SWE"),
    ("Singapore", "SG", "SGP"),
    ("Slovenia", "SI", "SVN"),
    ("Slovakia", "SK", "SVK"),
    ("South Africa", "ZA",  "ZAF"),
]

region_info.sort(key = lambda x: x[0])
region_names = list([x for x, _, _ in region_info])
regions_epi = list([x for _, x, _ in region_info])
regions_oxcgrt = list([x for _, _, x in region_info])

# OxCGRT
oxcgrt_cm_cols = ["H2_Testing policy", "C8_International travel controls"]

oxcgrt_filter = [
    ("Symptomatic Testing", [(0, [2, 3])]),
    ("Travel Screen/Quarantine", [(1, [1, 2, 3, 4])]),
    ("Travel Bans", [(1, [3, 4])]),
]

epifor_features = {
 "Mask wearing": "Mask Wearing",
 "Some businesses closed": "Some Businesses Suspended",
 "Most nonessential businesses closed": "Most Businesses Suspended",
 "Infection control in healthcare": "Healthcare Infection Control",
 "Stay-at-home order": "Stay Home Order",
 "Gatherings limited to 1000 people or less": "Gatherings <1000",    
 "Gatherings limited to 100 people or less": "Gatherings <100", 
 "Gatherings limited to 10 people or less": "Gatherings <10",
 "Schools closed": "School Closure"
}


final_features = [
 "Healthcare Infection Control",
 "Mask Wearing",
 "Symptomatic Testing",
 "Gatherings <1000",
 "Gatherings <100",
 "Gatherings <10",
 "Some Businesses Suspended",
 "Most Businesses Suspended",
 "School Closure",
 "Stay Home Order",
 "Travel Screen/Quarantine",
 "Travel Bans",
]

In [83]:
dm = DataMerger(start_date="2020-1-22", end_date="2020-4-25")
df = dm.merge_data("../../data", region_info, oxcgrt_filter,  oxcgrt_cm_cols,
            epifor_features,
            final_features, "final_data_travel.csv")

INFO:epimodel.pymc3_models.cm_effect.datapreprocessor:
Countermeasures: EpidemicForecasting.org           min   ... mean  ... max   ... unique
 1 Mask Wearing                               0.000 ... 0.091 ... 1.000 ... [0. 1.]
 2 Some Businesses Suspended                  0.000 ... 0.410 ... 1.000 ... [0. 1.]
 3 Most Businesses Suspended                  0.000 ... 0.303 ... 1.000 ... [0. 1.]
 4 Healthcare Infection Control               0.000 ... 0.537 ... 1.000 ... [0. 1.]
 5 Stay Home Order                            0.000 ... 0.197 ... 1.000 ... [0. 1.]
 6 Gatherings <1000                           0.000 ... 0.456 ... 1.000 ... [0. 1.]
 7 Gatherings <100                            0.000 ... 0.423 ... 1.000 ... [0. 1.]
 8 Gatherings <10                             0.000 ... 0.342 ... 1.000 ... [0. 1.]
 9 School Closure                             0.000 ... 0.448 ... 1.000 ... [0. 1.]
INFO:epimodel.pymc3_models.cm_effect.datapreprocessor:Load OXCGRT
INFO:epimodel.pymc3_models.cm_effec

In [63]:
Ds = pd.date_range(start="2020-1-22", end="2020-4-25", tz="utc")

In [68]:
Ds

DatetimeIndex(['2020-01-22 00:00:00+00:00', '2020-01-23 00:00:00+00:00',
               '2020-01-24 00:00:00+00:00', '2020-01-25 00:00:00+00:00',
               '2020-01-26 00:00:00+00:00', '2020-01-27 00:00:00+00:00',
               '2020-01-28 00:00:00+00:00', '2020-01-29 00:00:00+00:00',
               '2020-01-30 00:00:00+00:00', '2020-01-31 00:00:00+00:00',
               '2020-02-01 00:00:00+00:00', '2020-02-02 00:00:00+00:00',
               '2020-02-03 00:00:00+00:00', '2020-02-04 00:00:00+00:00',
               '2020-02-05 00:00:00+00:00', '2020-02-06 00:00:00+00:00',
               '2020-02-07 00:00:00+00:00', '2020-02-08 00:00:00+00:00',
               '2020-02-09 00:00:00+00:00', '2020-02-10 00:00:00+00:00',
               '2020-02-11 00:00:00+00:00', '2020-02-12 00:00:00+00:00',
               '2020-02-13 00:00:00+00:00', '2020-02-14 00:00:00+00:00',
               '2020-02-15 00:00:00+00:00', '2020-02-16 00:00:00+00:00',
               '2020-02-17 00:00:00+00:00', '2020-0

In [72]:

df.loc[c].index[0]

Timestamp('2020-03-03 00:00:00+0000', tz='UTC')

In [82]:
values_to_stack = []
Ds_l = list(Ds)

for c in regions_epi:
    if c in df.index:
        v = np.zeros((2, len(Ds)))
        
        if df.loc[c].index[0] in Ds_l:
            x_0 = list(Ds).index(df.loc[c].index[0])
        else:
            x_0 = 0
            
        v[:, x_0:] = df.loc[c].loc[Ds[x_0:]].T
        values_to_stack.append(v)

In [77]:
list(Ds)

[Timestamp('2020-01-22 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-01-23 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-01-24 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-01-25 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-01-26 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-01-27 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-01-28 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-01-29 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-01-30 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-01-31 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-02-01 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-02-02 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-02-03 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-02-04 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-02-05 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-02-06 00:00:00+0000', tz='UTC', freq='D'),
 Timestamp('2020-02-07 00:00:00+0000', t

In [41]:
johnhop_ds = pd.read_csv("../../data/johns-hopkins.csv", index_col=["Code", "Date"], parse_dates=["Date"], infer_datetime_format=True)

In [18]:
Ds = pd.date_range("2020-01-22", end="2020-05-09", tz="utc")

In [43]:
Ds

DatetimeIndex(['2020-01-22 00:00:00+00:00', '2020-01-23 00:00:00+00:00',
               '2020-01-24 00:00:00+00:00', '2020-01-25 00:00:00+00:00',
               '2020-01-26 00:00:00+00:00', '2020-01-27 00:00:00+00:00',
               '2020-01-28 00:00:00+00:00', '2020-01-29 00:00:00+00:00',
               '2020-01-30 00:00:00+00:00', '2020-01-31 00:00:00+00:00',
               ...
               '2020-04-30 00:00:00+00:00', '2020-05-01 00:00:00+00:00',
               '2020-05-02 00:00:00+00:00', '2020-05-03 00:00:00+00:00',
               '2020-05-04 00:00:00+00:00', '2020-05-05 00:00:00+00:00',
               '2020-05-06 00:00:00+00:00', '2020-05-07 00:00:00+00:00',
               '2020-05-08 00:00:00+00:00', '2020-05-09 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', length=109, freq='D')

In [44]:
johnhop_ds["Confirmed"].loc["AD", Ds]

Code  Date                     
AD    2020-01-22 00:00:00+00:00      0.0
      2020-01-23 00:00:00+00:00      0.0
      2020-01-24 00:00:00+00:00      0.0
      2020-01-25 00:00:00+00:00      0.0
      2020-01-26 00:00:00+00:00      0.0
                                   ...  
      2020-05-05 00:00:00+00:00    751.0
      2020-05-06 00:00:00+00:00    751.0
      2020-05-07 00:00:00+00:00    752.0
      2020-05-08 00:00:00+00:00    752.0
      2020-05-09 00:00:00+00:00    754.0
Name: Confirmed, Length: 109, dtype: float64

In [39]:
str(Ds[0])

'2020-01-22 00:00:00+00:00'

In [13]:
johnhop_ds

Unnamed: 0_level_0,Unnamed: 1_level_0,Recovered,Confirmed,Deaths,Active
Code,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AD,2020-01-22 00:00:00+00:00,0.0,0.0,0.0,0.0
AD,2020-01-23 00:00:00+00:00,0.0,0.0,0.0,0.0
AD,2020-01-24 00:00:00+00:00,0.0,0.0,0.0,0.0
AD,2020-01-25 00:00:00+00:00,0.0,0.0,0.0,0.0
AD,2020-01-26 00:00:00+00:00,0.0,0.0,0.0,0.0
...,...,...,...,...,...
ZW,2020-06-10 00:00:00+00:00,49.0,320.0,4.0,267.0
ZW,2020-06-11 00:00:00+00:00,51.0,332.0,4.0,277.0
ZW,2020-06-12 00:00:00+00:00,51.0,343.0,4.0,288.0
ZW,2020-06-13 00:00:00+00:00,54.0,356.0,4.0,298.0


In [45]:
dm = DataMerger(start_date="2020-1-22", end_date="2020-4-25")
dm.merge_data("../../data", region_info, oxcgrt_filter,  oxcgrt_cm_cols,
            epifor_features,
            final_features, "final_data_extended")

INFO:epimodel.pymc3_models.cm_effect.datapreprocessor:
Countermeasures: EpidemicForecasting.org           min   ... mean  ... max   ... unique
 1 Mask Wearing                               0.000 ... 0.091 ... 1.000 ... [0. 1.]
 2 Some Businesses Suspended                  0.000 ... 0.410 ... 1.000 ... [0. 1.]
 3 Most Businesses Suspended                  0.000 ... 0.303 ... 1.000 ... [0. 1.]
 4 Healthcare Infection Control               0.000 ... 0.537 ... 1.000 ... [0. 1.]
 5 Stay Home Order                            0.000 ... 0.197 ... 1.000 ... [0. 1.]
 6 Gatherings <1000                           0.000 ... 0.456 ... 1.000 ... [0. 1.]
 7 Gatherings <100                            0.000 ... 0.423 ... 1.000 ... [0. 1.]
 8 Gatherings <10                             0.000 ... 0.342 ... 1.000 ... [0. 1.]
 9 School Closure                             0.000 ... 0.448 ... 1.000 ... [0. 1.]
INFO:epimodel.pymc3_models.cm_effect.datapreprocessor:Load OXCGRT
INFO:epimodel.pymc3_models.cm_effec

KeyboardInterrupt: 