In [4]:
# Setup and imports
%matplotlib inline

import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd
import pymc3 as pm

import seaborn as sns

from src.data import make_dataset
from src.features import build_features
from src.models import train_model

In [5]:
df = make_dataset.make_dataset()

Dropping 32/185 countries due to lack of data


In [6]:
build_features.build_features(df)

In [7]:
df

Unnamed: 0_level_0,cases,deaths,death_rate_observed,days_since_first_case,cpi_score_2019,healthcare_oop_expenditure,hospital_beds,hci,population_perc_over65,population_perc_rural,population_perc_diabetic,days_since_first_case_normalized,cpi_score_2019_normalized,healthcare_oop_expenditure_normalized,hospital_beds_normalized,hci_normalized,population_perc_over65_normalized,population_perc_rural_normalized,population_perc_diabetic_normalized
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Afghanistan,607,18,0.029654,48,16.0,77.401006,0.5,0.389,2.584927,74.505,9.2,-0.600000,-1.000000,2.397450,-1.056793,-1.244363,-1.036844,1.611667,0.412993
Albania,446,23,0.051570,34,35.0,57.980901,2.9,0.621,13.744736,39.681,9.0,-0.733333,-0.732394,1.351901,-0.075133,0.281341,0.666391,0.034435,0.362035
Algeria,1914,293,0.153083,47,35.0,30.883271,1.9,0.523,6.362497,27.371,6.7,-0.609524,-0.732394,-0.106994,-0.484158,-0.363137,-0.460302,-0.523103,-0.223982
Angola,19,2,0.105263,23,26.0,35.208930,,0.361,2.216374,34.486,4.5,-0.838095,-0.859155,0.125893,,-1.428500,-1.093093,-0.200854,-0.784521
Argentina,2142,90,0.042017,40,45.0,15.798187,5.0,0.611,11.117789,8.130,5.9,-0.676190,-0.591549,-0.919152,0.783820,0.215578,0.265461,-1.394558,-0.427814
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Uruguay,480,7,0.014583,29,71.0,17.371092,2.8,0.600,14.814520,4.666,7.3,-0.780952,-0.225352,-0.834469,-0.116035,0.143239,0.829664,-1.551448,-0.071108
Uzbekistan,865,4,0.004624,28,25.0,52.247824,4.0,,4.419138,49.522,6.5,-0.790476,-0.873239,1.043241,0.374795,,-0.756902,0.480149,-0.274940
Vietnam,262,0,0.000000,80,37.0,44.572819,2.6,0.666,7.274978,64.081,6.0,-0.295238,-0.704225,0.630030,-0.197841,0.577275,-0.321037,1.139548,-0.402335
Zambia,43,2,0.046512,25,34.0,12.117911,2.0,0.396,2.099678,56.479,4.5,-0.819048,-0.746479,-1.117293,-0.443256,-1.198329,-1.110903,0.795242,-0.784521


In [8]:
def initialize_model(df):

    NUM_COVARIATES = len([x for x in df.columns if 'normalized' in x])
    
    n = len(df)

    covid_mortality_model = pm.Model()

    with covid_mortality_model:

        # Priors:
        mu_0 = pm.Beta('mu_0', alpha=0.3, beta=10)
        sig_0 = pm.Uniform('sig_0', lower=0.0, upper=mu_0 * (1 - mu_0))
        beta = pm.Normal('beta', mu=0, sigma=5, shape=NUM_COVARIATES)
        sigma = pm.HalfNormal('sigma', sigma=5)

        # Model mu from country-wise covariates:
        # Apply logit transformation so logistic regression performed
        mu_0_logit = np.log(mu_0 / (1 - mu_0))
        mu_est = mu_0_logit + \
            beta[0] * df['days_since_first_case_normalized'].values + \
            beta[1] * df['cpi_score_2019_normalized'].values + \
            beta[2] * df['healthcare_oop_expenditure_normalized'].values + \
            beta[3] * df['hospital_beds_normalized'].values + \
            beta[4] * df['hci_normalized'].values + \
            beta[5] * df['population_perc_over65_normalized'].values + \
            beta[6] * df['population_perc_rural_normalized'].values + \
            beta[7] * df['population_perc_diabetic_normalized'].values
        mu_model_logit = pm.Normal('mu_model_logit',
                                   mu=mu_est,
                                   sigma=sigma,
                                   shape=n)
        # Transform back to probability space:
        mu_model = np.exp(mu_model_logit) / (np.exp(mu_model_logit) + 1)

        # tau_i, mortality rate for each country
        # Parameterize with (mu, sigma)
        # instead of (alpha, beta) to ease interpretability.
        tau = pm.Beta('tau', mu=mu_model, sigma=sig_0, shape=n)

        # Binomial likelihood:
        d_obs = pm.Binomial('d_obs',
                            n=df['cases'].values,
                            p=tau,
                            observed=df['deaths'].values)

    return covid_mortality_model

In [9]:
model = initialize_model(df)

AttributeError: <pymc3.distributions.continuous.Normal object at 0x1a2044f5d0> has no finite default value to use, checked: ('median', 'mean', 'mode'). Pass testval argument or adjust so value is finite.

In [7]:
trace = train_model.train_model(model)

NameError: name 'model' is not defined