In [1]:
%matplotlib notebook
import matplotlib
import seaborn as sb
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

# Jupyter Specifics
%matplotlib inline
from IPython.display import display, HTML
from ipywidgets.widgets import interact, interactive, IntSlider, FloatSlider, Layout, ToggleButton, ToggleButtons, fixed
display(HTML("<style>.container { width:100% !important; }</style>"))
style = {'description_width': '100px'}
slider_layout = Layout(width='99%')

from time import time
import pickle as pk

# Cluster data setup

In [2]:
from Cluster import *

In [3]:
ClData=ClusterData(clusdtype='JRP1',base_data='data_all_base',cluster_data=False,report_correct=True,database='JHU',daysync=23,thresh=10,
                 mindays=150, mindeaths=200,mindeathspm=0.1,syncat='first major peak',K=2)

reading in data from data_all_base ...
elapsed:  6.3509650230407715
Constructing common synchronized deaths, case and testing data...
database JHU report_correct True
mindeaths 200 mindeathspm 0.1
database JHU report correction True
daysync 23 thresh for deaths 10 mindays 150
No of big common countries is 99
---------------------------------
number of countries in total_deaths) 187
number of countries in big 99
synchronizing and trimming time series to common length...
making cases with nonlinear testing adjustment...
done.
----------------------------------------
Finished loading Cluster module
----------------------------------------


In [4]:
exec(open('ClusterFit.py','r').read())

In [5]:
print(len(ClData.countries_jhu_4_owid),len(ClData.countries_jhu_2_owid),len(ClData.countries_owid),len(ClData.countries_jhu))
print('countries in common: owid format')
print(ClData.countries_jhu_2_owid)
print('')
print('owid countries not in common set')
print(set(ClData.countries_owid)-set(ClData.countries_jhu_2_owid))
print('')
print('countries in common: jhu format')
print(ClData.countries_owid_to_jhu)
print('')
print(len(ClData.bcountries),'bcountries',ClData.bcountries)

188 188 212 191
countries in common: owid format
['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Myanmar', 'Burundi', 'Cape Verde', 'Cambodia', 'Cameroon', 'Central African Republic', 'Chad', 'Chile', 'Colombia', 'Comoros', 'Congo', 'Democratic Republic of Congo', 'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czech Republic', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Swaziland', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Grenada', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Vatican', 'Honduras', 'Hungary', 'Iceland', 'India', 'Ind

In [6]:
clusdata_all = ClData.clusdata_all
bcountries = ClData.bcountries
cases = [c for c in clusdata_all]
cases

['deaths',
 'cases',
 'cases_lin2020',
 'cases_pwlfit',
 'cases_nonlin',
 'cases_nonlinr']

In [7]:
datasets = ['deaths','cases','cases_lin2020','cases_pwlfit','cases_nonlin']

d_countries = [c for c in clusdata_all['deaths']]
c_countries = [c for c in clusdata_all['cases']]
lc_countries = [c for c in clusdata_all['cases_lin2020']]
pc_countries = [c for c in clusdata_all['cases_pwlfit']]
nc_countries = [c for c in clusdata_all['cases_nonlin']]

countries = d_countries

In [8]:
print(len(d_countries))
print(np.sort(d_countries))

77
['Afghanistan' 'Albania' 'Algeria' 'Armenia' 'Australia' 'Austria'
 'Azerbaijan' 'Bahrain' 'Bangladesh' 'Belarus' 'Belgium'
 'Bosnia and Herzegovina' 'Brazil' 'Cameroon' 'Canada' 'Chile' 'Croatia'
 'Denmark' 'Dominican Republic' 'Ecuador' 'Egypt' 'El Salvador' 'Ethiopia'
 'Finland' 'France' 'Germany' 'Ghana' 'Greece' 'Guatemala' 'Haiti'
 'Honduras' 'Hungary' 'India' 'Indonesia' 'Iran' 'Iraq' 'Ireland' 'Israel'
 'Italy' 'Japan' 'Kenya' 'Kosovo' 'Kuwait' 'Kyrgyzstan' 'Macedonia'
 'Madagascar' 'Malaysia' 'Mexico' 'Moldova' 'Netherlands' 'Norway' 'Oman'
 'Pakistan' 'Palestine' 'Panama' 'Peru' 'Philippines' 'Poland' 'Portugal'
 'Qatar' 'Romania' 'Russia' 'Saudi Arabia' 'Senegal' 'Serbia'
 'South Korea' 'Spain' 'Sudan' 'Sweden' 'Switzerland' 'Turkey'
 'United Arab Emirates' 'United Kingdom' 'United States' 'Venezuela'
 'Yemen' 'Zimbabwe']


In [9]:
# check that all country sets being used are the same and check time series lengths and starting dates
# 79 countries with Oct 27 finish and with mindeaths=100 and mindays=150 and mindeathspm = 0.5
countrysets = [d_countries,c_countries,lc_countries,pc_countries,nc_countries]
print([len(ccs) for ccs in countrysets])
for ccs1 in countrysets:
    print([ccs1 == ccs2 for ccs2 in countrysets])
print([len(clusdata_all[d1]['United States']) for d1 in datasets])
# print(len(total_deaths_x['dates']),len(total_cases_x['dates']),len(testing_x['dates']),total_deaths_x['dates'][0],total_cases_x['dates'][0],testing_x['dates'][0])

[77, 77, 77, 77, 77]
[True, True, True, True, True]
[True, True, True, True, True]
[True, True, True, True, True]
[True, True, True, True, True]
[True, True, True, True, True]
[153, 153, 153, 153, 153]


In [10]:
ClData.covid_owid[0].keys()

odict_keys(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases', 'new_cases_smoothed', 'total_deaths', 'new_deaths', 'new_deaths_smoothed', 'total_cases_per_million', 'new_cases_per_million', 'new_cases_smoothed_per_million', 'total_deaths_per_million', 'new_deaths_per_million', 'new_deaths_smoothed_per_million', 'icu_patients', 'icu_patients_per_million', 'hosp_patients', 'hosp_patients_per_million', 'weekly_icu_admissions', 'weekly_icu_admissions_per_million', 'weekly_hosp_admissions', 'weekly_hosp_admissions_per_million', 'total_tests', 'new_tests', 'total_tests_per_thousand', 'new_tests_per_thousand', 'new_tests_smoothed', 'new_tests_smoothed_per_thousand', 'tests_per_case', 'positive_rate', 'tests_units', 'stringency_index', 'population', 'population_density', 'median_age', 'aged_65_older', 'aged_70_older', 'gdp_per_capita', 'extreme_poverty', 'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers', 'male_smokers', 'handwashing_facilities', 'hospital_b

In [11]:
ClData.covid_ts.keys()

dict_keys(['confirmed', 'deaths', 'recovered', 'new_deaths', 'new_deaths_smoothed', 'deaths_smoothed', 'new_deaths_corrected', 'new_deaths_corrected_smoothed', 'deaths_corrected_smoothed', 'new_confirmed', 'new_confirmed_smoothed', 'confirmed_smoothed', 'new_confirmed_corrected', 'new_confirmed_corrected_smoothed', 'confirmed_corrected_smoothed', 'confirmed_linr_corrected_smoothed', 'new_confirmed_nonlin_corrected_smoothed', 'confirmed_nonlin_corrected_smoothed', 'new_confirmed_nonlinr_corrected_smoothed', 'confirmed_nonlinr_corrected_smoothed'])

In [12]:
ClData.covid_owid_ts.keys()

dict_keys(['confirmed', 'deaths', 'recovered', 'tests', 'stringency', 'population', 'population_density', 'gdp_per_capita', 'new_deaths', 'new_deaths_smoothed', 'deaths_smoothed', 'new_deaths_corrected', 'new_deaths_corrected_smoothed', 'deaths_corrected_smoothed', 'new_confirmed', 'new_confirmed_smoothed', 'confirmed_smoothed', 'new_confirmed_corrected', 'new_confirmed_corrected_smoothed', 'confirmed_corrected_smoothed', 'confirmed_linr_corrected_smoothed', 'new_confirmed_nonlin_corrected_smoothed', 'confirmed_nonlin_corrected_smoothed', 'new_confirmed_nonlinr_corrected_smoothed', 'confirmed_nonlinr_corrected_smoothed'])

# Data save

Execute this section once to produce file `data_cluster_... .pk`.

In [13]:
miscnms = ['ClData','clusdata_all','cases','datasets','contact_dic','age_group_dic']

In [14]:
deathnms = [x for x in dir() if 'deaths' in x]

In [15]:
casenms = [x for x in dir() if 'cases' in x if not callable(eval(x))]

In [16]:
covidnms = [x for x in dir() if 'covid' in x]

In [17]:
popnms = [x for x in dir() if 'population' in x]

In [18]:
# lccountries is type dict_keys, which can't be pickled
countrynms = [x for x in dir() if 'countr' in x and not callable(eval(x))]
countrynms = [x for x in dir() if 'countr' in x and (isinstance(eval(x),dict) or isinstance(eval(x),list) or isinstance(eval(x),tuple))]

In [19]:
allnms = countrynms + covidnms + miscnms + deathnms + casenms + popnms
allnmsc = list(set(allnms)-set(ClData.allnmsdb))
data_allc = {nm:eval(nm) for nm in allnmsc}

In [20]:
start = time()
pk.dump(data_allc,open('./pks/data_cluster_'+ClData.clusdtype+'.pk','wb'))
print('elapsed: ',time()-start)

elapsed:  7.33026385307312


# Data Load

Use this code to read in the data, e.g. at the top of another notebook, as an alternative to loading data.py or Cluster.py

In [None]:
# read in data
start=time()
print('reading in data...')
with open('./pks/data_all.pk','rb') as fp:
    foo = pk.load(fp)
print('elapsed: ',time()-start)

# make each element of the dictionary a global variable named with key:
for x in foo:
    stmp = x+"= foo['"+x+"']"
    exec(stmp)