In [2]:
# Setup and imports
%matplotlib inline

import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd
import pymc3 as pm

import seaborn as sns

from src.data import make_dataset
from src.features import build_features
from src.models import train_model

In [3]:
df = make_dataset.make_dataset()

Dropping 31/184 countries due to lack of data


In [4]:
build_features.build_features(df)

In [9]:
df

Unnamed: 0_level_0,cases,deaths,death_rate_observed,days_since_first_case,cpi_score_2019,healthcare_oop_expenditure,hospital_beds,hci,population_perc_over65,population_perc_rural,population_perc_diabetic,days_since_first_case_normalized,cpi_score_2019_normalized,healthcare_oop_expenditure_normalized,hospital_beds_normalized,hci_normalized,population_perc_over65_normalized,population_perc_rural_normalized,population_perc_diabetic_normalized
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Afghanistan,484,15,0.030992,45,16.0,77.401006,0.5,0.389,2.584927,74.505,9.2,-0.600000,-1.000000,2.397450,-1.056793,-1.244363,-1.036844,1.611667,0.412993
Albania,409,23,0.056235,31,35.0,57.980901,2.9,0.621,13.744736,39.681,9.0,-0.733333,-0.732394,1.351901,-0.075133,0.281341,0.666391,0.034435,0.362035
Algeria,1666,235,0.141056,44,35.0,30.883271,1.9,0.523,6.362497,27.371,6.7,-0.609524,-0.732394,-0.106994,-0.484158,-0.363137,-0.460302,-0.523103,-0.223982
Angola,19,2,0.105263,20,26.0,35.208930,,0.361,2.216374,34.486,4.5,-0.838095,-0.859155,0.125893,,-1.428500,-1.093093,-0.200854,-0.784521
Argentina,1795,72,0.040111,37,45.0,15.798187,5.0,0.611,11.117789,8.130,5.9,-0.676190,-0.591549,-0.919152,0.783820,0.215578,0.265461,-1.394558,-0.427814
Armenia,921,10,0.010858,39,42.0,80.646954,4.2,0.572,11.253818,36.851,6.1,-0.657143,-0.633803,2.572207,0.456600,-0.040898,0.286222,-0.093740,-0.376856
Australia,6108,51,0.008350,74,77.0,18.943713,3.8,0.803,15.656475,13.988,5.6,-0.323810,-0.140845,-0.749802,0.292990,1.478230,0.958165,-1.129240,-0.504252
Austria,13244,295,0.022274,44,77.0,18.919736,7.6,0.793,19.001566,41.703,6.6,-0.609524,-0.140845,-0.751093,1.847285,1.412467,1.468701,0.126015,-0.249461
Azerbaijan,926,9,0.009719,39,30.0,78.918784,4.7,0.597,6.195183,44.320,6.1,-0.657143,-0.802817,2.479164,0.661112,0.123510,-0.485838,0.244543,-0.376856
Bahrain,887,5,0.005637,45,42.0,27.987860,2.0,0.668,2.426334,10.713,15.6,-0.600000,-0.633803,-0.262879,-0.443256,0.590428,-1.061048,-1.277570,2.043651


In [5]:
def initialize_model(df):

    NUM_COVARIATES = len([x for x in df.columns if 'normalized' in x])
    
    n = len(df)

    covid_mortality_model = pm.Model()

    with covid_mortality_model:

        # Priors:
        mu_0 = pm.Beta('mu_0', alpha=0.3, beta=10)
        sig_0 = pm.Uniform('sig_0', lower=0.0, upper=mu_0 * (1 - mu_0))
        beta = pm.Normal('beta', mu=0, sigma=5, shape=NUM_COVARIATES)
        sigma = pm.HalfNormal('sigma', sigma=5)

        # Model mu from country-wise covariates:
        # Apply logit transformation so logistic regression performed
        mu_0_logit = np.log(mu_0 / (1 - mu_0))
        mu_est = mu_0_logit + \
            beta[0] * df['days_since_first_case_normalized'].values + \
            beta[1] * df['cpi_score_2019_normalized'].values + \
            beta[2] * df['healthcare_oop_expenditure_normalized'].values + \
            beta[3] * df['hospital_beds_normalized'].values + \
            beta[4] * df['hci_normalized'].values + \
            beta[5] * df['population_perc_over65_normalized'].values + \
            beta[6] * df['population_perc_rural_normalized'].values + \
            beta[7] * df['population_perc_diabetic_normalized'].values
        mu_model_logit = pm.Normal('mu_model_logit',
                                   mu=mu_est,
                                   sigma=sigma,
                                   shape=n)
        # Transform back to probability space:
        mu_model = np.exp(mu_model_logit) / (np.exp(mu_model_logit) + 1)

        # tau_i, mortality rate for each country
        # Parameterize with (mu, sigma)
        # instead of (alpha, beta) to ease interpretability.
        tau = pm.Beta('tau', mu=mu_model, sigma=sig_0, shape=n)

        # Binomial likelihood:
        d_obs = pm.Binomial('d_obs',
                            n=df['cases'].values,
                            p=tau,
                            observed=df['deaths'].values)

    return covid_mortality_model

In [8]:
model = initialize_model(df)

AttributeError: <pymc3.distributions.continuous.Normal object at 0x1c37ce1400> has no finite default value to use, checked: ('median', 'mean', 'mode'). Pass testval argument or adjust so value is finite.

In [7]:
trace = train_model.train_model(model)

NameError: name 'model' is not defined