In [1]:
%matplotlib notebook
import matplotlib
import seaborn as sb
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

# Jupyter Specifics
%matplotlib inline
from IPython.display import display, HTML
from ipywidgets.widgets import interact, interactive, IntSlider, FloatSlider, Layout, ToggleButton, ToggleButtons, fixed
display(HTML("<style>.container { width:100% !important; }</style>"))
style = {'description_width': '100px'}
slider_layout = Layout(width='99%')

from time import time
import pickle as pk

# Base data setup from databases

In [2]:
# new module data_config imported by data.py as well as Cluster.py
import data_config
data_config.report_correct = True
from data import *

Getting data:
getting JHU data...
jhu data selected from 1/22/20 to 12/17/20
getting owid data...
countries without population data excluded: ['International', 'Hong Kong', '']
owid data selected from 1/23/20 to 12/18/20
expanding OWID data : to new (daily), 7-day rolling (smoothed), reporting glitch (corrected) and combined


HBox(children=(FloatProgress(value=0.0, description='report correction deaths', max=198.0, style=ProgressStyle…




HBox(children=(FloatProgress(value=0.0, description='report correction confirmed', max=198.0, style=ProgressSt…


number of countries listed in OWID database 197
done with OWID data (covid_owid_ts dictionary see .keys()) .
getting ICU and acute care data icus_2012 and WHO ...
WHO acute file found dictionary acute_who
ICU file found dictionary icus_2012
mapping country names between JHU and OWID and extracting common countries...
getting 2017 contact matrix data from 152 countries ...
152 country contact files found 1 A-M and 2 M-Z
Of 198 in countries_common 146 have contact matrices
4 country contact matrices set equal to that of neighbour to complete cluster country set
                   Afghanistan:Pakistan Kosovo:Serbia Moldova:Romania Norway:Sweden
getting UN all sex age group data for 2020 ...
UN contact files found 1 and 2
Of 198 in countries_common 182 have age structure
Kosovo age structure digitized from CIA World Fact Book Image 2018 to complete cluster country set in trimmed excel file
extracting data sets for common countries both databases...
expanding JHU data : to new (daily), 7-d

HBox(children=(FloatProgress(value=0.0, description='report correction deaths', max=198.0, style=ProgressStyle…




HBox(children=(FloatProgress(value=0.0, description='report correction confirmed', max=198.0, style=ProgressSt…


number of countries listed in JHU database 192
done with JHU data (covid_ts dictionary keys: confirmed, deaths, recovered).
extracting testing data from OWID database
doing piecewise linear fits to testing data ... reg_testing


HBox(children=(FloatProgress(value=0.0, description='piecewise linear fit', max=198.0, style=ProgressStyle(des…


completed regularization of testing by pwlf and linear adjustment to confirmed cases (linr).
constructing nonlinear adjustment to confirmed cases based on pwlf testing (nonlin and nonlinr ...
completed nonlinear adjustment to confirmed cases.
Done with data.
---------------------------------


In [4]:
countries_latitudes

{('Afghanistan', ''): '33.93911',
 ('Albania', ''): '41.1533',
 ('Algeria', ''): '28.0339',
 ('Andorra', ''): '42.5063',
 ('Angola', ''): '-11.2027',
 ('Antigua and Barbuda', ''): '17.0608',
 ('Argentina', ''): '-38.4161',
 ('Armenia', ''): '40.0691',
 ('Australia', 'Australian Capital Territory'): '-35.4735',
 ('Australia', 'New South Wales'): '-33.8688',
 ('Australia', 'Northern Territory'): '-12.4634',
 ('Australia', 'Queensland'): '-27.4698',
 ('Australia', 'South Australia'): '-34.9285',
 ('Australia', 'Tasmania'): '-42.8821',
 ('Australia', 'Victoria'): '-37.8136',
 ('Australia', 'Western Australia'): '-31.9505',
 ('Austria', ''): '47.5162',
 ('Azerbaijan', ''): '40.1431',
 ('Bahamas', ''): '25.025885',
 ('Bahrain', ''): '26.0275',
 ('Bangladesh', ''): '23.685',
 ('Barbados', ''): '13.1939',
 ('Belarus', ''): '53.7098',
 ('Belgium', ''): '50.8333',
 ('Belize', ''): '17.1899',
 ('Benin', ''): '9.3077',
 ('Bhutan', ''): '27.5142',
 ('Bolivia', ''): '-16.2902',
 ('Bosnia and Herzego

In [5]:
continents

['Europe',
 'Middle_East',
 'North America',
 'Oceania',
 'Africa',
 'Asia',
 'South America',
 'Middle_East']

In [6]:
# continents + Middle_East
for cc in countries_owid:
    print(cc,continent[cc])
set([continent[cc] for cc in countries_owid])

Afghanistan Asia
Albania Europe
Algeria Africa
Andorra Europe
Angola Africa
Antigua and Barbuda North America
Argentina South America
Armenia Asia
Australia Oceania
Austria Europe
Azerbaijan Asia
Bahamas North America
Bahrain Middle_East
Bangladesh Asia
Barbados North America
Belarus Europe
Belgium Europe
Belize North America
Benin Africa
Bhutan Asia
Bolivia South America
Bosnia and Herzegovina Europe
Botswana Africa
Brazil South America
Brunei Asia
Bulgaria Europe
Burkina Faso Africa
Burundi Africa
Cambodia Asia
Cameroon Africa
Canada North America
Cape Verde Africa
Central African Republic Africa
Chad Africa
Chile South America
China Asia
Colombia South America
Comoros Africa
Congo Africa
Costa Rica North America
Cote d'Ivoire Africa
Croatia Europe
Cuba North America
Cyprus Europe
Czechia Europe
Democratic Republic of Congo Africa
Denmark Europe
Djibouti Africa
Dominica North America
Dominican Republic North America
Ecuador South America
Egypt Middle_East
El Salvador North America
Eq

{'Africa',
 'Asia',
 'Europe',
 'Middle_East',
 'North America',
 'Oceania',
 'Planet',
 'South America',
 'World'}

In [7]:
countries_in_continent

{'Asia': ['Afghanistan',
  'Armenia',
  'Azerbaijan',
  'Bangladesh',
  'Bhutan',
  'Brunei',
  'Cambodia',
  'China',
  'Georgia',
  'India',
  'Indonesia',
  'Japan',
  'Kazakhstan',
  'Kyrgyzstan',
  'Laos',
  'Malaysia',
  'Maldives',
  'Mongolia',
  'Myanmar',
  'Nepal',
  'Pakistan',
  'Philippines',
  'Singapore',
  'South Korea',
  'Sri Lanka',
  'Taiwan',
  'Tajikistan',
  'Thailand',
  'Timor',
  'Uzbekistan',
  'Vietnam'],
 'Europe': ['Albania',
  'Andorra',
  'Austria',
  'Belarus',
  'Belgium',
  'Bosnia and Herzegovina',
  'Bulgaria',
  'Croatia',
  'Cyprus',
  'Czechia',
  'Denmark',
  'Estonia',
  'Finland',
  'France',
  'Germany',
  'Greece',
  'Hungary',
  'Iceland',
  'Ireland',
  'Italy',
  'Kosovo',
  'Latvia',
  'Liechtenstein',
  'Lithuania',
  'Luxembourg',
  'Malta',
  'Moldova',
  'Monaco',
  'Montenegro',
  'Netherlands',
  'North Macedonia',
  'Norway',
  'Poland',
  'Portugal',
  'Romania',
  'Russia',
  'San Marino',
  'Serbia',
  'Slovakia',
  'Slovenia'

## Data save

Execute this section once to produce file `data_all_base.pk` in directory pks.

In [8]:
# miscnms = ['clusdata_all','cases','datasets','contact_dic','age_group_dic']
miscnms = ['acute_dict','age_group_dic','base','contact_dic','final_date','gdp_per_capita_owid',
 'icu_dict','owid_file','stringency_owid','tests_owid','translate_age','translate_contact']
deathnms = [x for x in dir() if 'deaths' in x]
casenms = [x for x in dir() if 'cases' in x if not callable(eval(x))]
covidnms = [x for x in dir() if 'covid' in x]
popnms = [x for x in dir() if 'population' in x]
testnms = [x for x in dir() if 'testing' in x if not callable(eval(x))]
recovnms = [x for x in dir() if 'recovered' in x]
confnms = [x for x in dir() if 'confirmed' in x]
countrynms = [x for x in dir() if 'countr' in x and not callable(eval(x))]
countrynms = [x for x in dir() if 'countr' in x and (isinstance(eval(x),dict) or isinstance(eval(x),list) or isinstance(eval(x),tuple))]
continentnms = [x for x in dir() if 'continent' in x]

allnmsdb = countrynms + covidnms + miscnms + deathnms + confnms + recovnms + casenms + popnms + testnms + ['allnmsdb']
data_all = {nm:eval(nm) for nm in allnmsdb}

start = time()
pk.dump(data_all,open('./pks/data_all_base.pk','wb'))
print('elapsed: ',time()-start)

elapsed:  7.452109098434448


In [9]:
print(allnmsdb)

['countries_common', 'countries_common_age', 'countries_common_contact', 'countries_common_x', 'countries_in_continent', 'countries_jhu', 'countries_jhu_2_owid', 'countries_jhu_4_owid', 'countries_jhu_non_special', 'countries_jhu_overseas', 'countries_latitudes', 'countries_nopopulation', 'countries_owid', 'countries_owid_to_jhu', 'countrynms', 'd_countries', 'jhu_to_owid_str_country', 'owid_to_jhu_str_country', 'scountries', 'covid_owid', 'covid_owid_ts', 'covid_ts', 'acute_dict', 'age_group_dic', 'base', 'contact_dic', 'final_date', 'gdp_per_capita_owid', 'icu_dict', 'owid_file', 'stringency_owid', 'tests_owid', 'translate_age', 'translate_contact', 'deaths', 'deaths_jhu', 'deaths_owid', 'new_deaths_c_spm_jhu', 'new_deaths_c_spm_owid', 'new_deaths_pm_jhu', 'new_deaths_pm_owid', 'new_deaths_spm_jhu', 'new_deaths_spm_owid', 'total_deaths_cs_jhu', 'total_deaths_cs_owid', 'total_deaths_jhu', 'total_deaths_owid', 'total_deaths_s_jhu', 'total_deaths_s_owid', 'confirmed', 'confirmed_jhu', '

In [10]:
# the remaining list from dir() below should only contain temporary variables and system modules
[x for x in dir() if '__' not in x and not callable(eval(x)) and not x in allnmsdb and not x.startswith('_') and not x.endswith('nms')]

['In',
 'Out',
 'cc',
 'cc_jhu',
 'cont',
 'cont_jhu',
 'continent',
 'continents',
 'csv',
 'data_all',
 'data_config',
 'data_days',
 'datetime',
 'daystop',
 'daysync',
 'debug',
 'math',
 'matplotlib',
 'np',
 'pd',
 'pk',
 'plt',
 'pwlf',
 'sb',
 'slider_layout',
 'start',
 'stats',
 'style',

## Data Load

Use this code to read in the data, e.g. at the top of another notebook, as an alternative to loading data.py

In [11]:
# read in data
start=time()
print('reading in data...')
with open('./pks/data_all_base.pk','rb') as fp:
    foo = pk.load(fp)
print('elapsed: ',time()-start)

# make each element of the dictionary a global variable named with key:
for x in foo:
    stmp = x+"= foo['"+x+"']"
    exec(stmp)

reading in data...
elapsed:  8.293049097061157
