In [1]:
# imports
import numpy as np
import pandas as pd
import json
import requests
import os
import sys
from operator import itemgetter

# set up file paths and other data globals

import config, modify

sys.path.append(config.CURRENT_DIR_STR)
sys.path.append(config.COVID_PACKAGE_STR)
sys.path.append(config.UPDATE_FILE_STR)

# local imports

from covid_package.data_funcs.store_data import read_json_data, convert_owid_data, print_update_record
from covid_package.data_funcs.update_data import check_refresh_data

from covid_package.libs.valid_keys import fetch_l0_keys, fetch_l1_keys, fetch_l2_keys
from covid_package.libs.aggregate_data import fetch_latest_data_date, fetch_date_list
from covid_package.libs.country_list import fetch_countries

from covid_package.api.get_country_records import get_country_records
from covid_package.api.get_country_data import get_l0_data, get_l1_data, get_l2_iso_data, get_l2_date_data
from covid_package.api.get_case_death_data import get_case_death_stdev

from covid_package.plots.plot_results import subplot_share_axis

In [2]:
# update data

if check_refresh_data():
    # read the updated(?) data file from the data dir
    data = read_json_data(config.DATA_FILE_STR)
    # convert the OWID_ keys
    data = convert_owid_data(data)
    # repopulate the keys
    key_list = fetch_l0_keys(data)
    # need this?
    #country_list = fetch_countries(data)
    # get the date of the newest data records in owid-covid-data.json
    print("Latest data is:", fetch_latest_data_date(data, key_list))

Checking that data is up to date
Data file up to date
Latest data is: 2021-04-20


In [3]:
#[print(k) for k in key_list]

# get the data for a single country
#print(get_l0_data(data, 'GBR'))

# get the level 1 data for all countries
#print(get_l1_data(data, key_list, ['population']))

# list of valid level 1 keys
#print(fetch_l1_keys(data, key_list))

# list of valid level 2 keys
#print(fetch_l2_keys(data, key_list))

# for each country, get record numbers and data dates
#print(get_country_records(data, key_list))

# get an exhaustive list of dates in the data
date_list = fetch_date_list(data, key_list)

# define the list of required resources
res = ['new_cases_per_million', 'new_deaths_per_million']

# returns a dict with key = date, values = dict of isos
# the value of each iso key is a list of the required resources
country_date_data = get_l2_date_data(data, key_list, date_list, res)

print("Country_date_data complete...")

Country_date_data complete...


In [4]:
# cycle through each day's collections of new cases and new deaths

date_dict = dict()
for day in country_date_data:

    #print(country_date_data[day])

    # calculate the wrl_new_ cases/deaths _per_million
    if country_date_data[day]['WRL'][0]:
        wrl_new_cases_pm = country_date_data[day]['WRL'][0]
    else:
        wrl_new_cases_pm = 0
    
    if country_date_data[day]['WRL'][1]:
        wrl_new_deaths_pm = country_date_data[day]['WRL'][1]
    else:
        wrl_new_deaths_pm = 0

    # pop the wrl vals out of the structure
    # so they don't screw up the stdev calculation
    country_date_data[day].pop('WRL')

    # calculate the std dev for the day
    mean_stdev = get_case_death_stdev(country_date_data[day])

    """
    # safety checks

    if mean_stdev[0] != wrl_new_cases_pm:
        print('Deviation: for {}, WRL new_cases_per_million = {}; calcuated value = {}'.format(day, wrl_new_cases_pm, mean_stdev[0]))

    if mean_stdev[2] != wrl_new_deaths_pm:
        print('Deviation: for {}, WRL new_deaths_per_million = {}; calcuated value = {}'.format(day, wrl_new_deaths_pm, mean_stdev[2]))
    """

    # populate the date dict for this day
    date_dict[day] = {
        "wrl_new_cases_pm": wrl_new_cases_pm,
        "wrl_new_deaths_pm": wrl_new_deaths_pm,
        "stdev_new_cases_pm": mean_stdev[0],
        "stdev_new_deaths_pm": mean_stdev[1],
        "country_vals": country_date_data[day]
    }

test_date = '2020-04-01'
print(date_dict[test_date])

{'wrl_new_cases_pm': 10.623, 'wrl_new_deaths_pm': 0.769, 'stdev_new_cases_pm': 37.013, 'stdev_new_deaths_pm': 3.272, 'country_vals': {'AFG': [0.565, 0.0], 'ALB': [5.56, 0.0], 'DZA': [2.987, 0.319], 'AND': [181.195, 25.885], 'AGO': [0.03, 0.0], 'ATG': [0.0, 0], 'ARG': [0.0, 0.022], 'ARM': [13.161, 0.337], 'AUS': [11.882, 0.078], 'AUT': [58.958, 1.999], 'AZE': [6.016, 0.0], 'BHS': [17.8, 2.543], 'BHR': [1.175, 0.0], 'BGD': [0.018, 0.006], 'BRB': [0.0, 0], 'BLR': [1.164, 0.106], 'BEL': [102.592, 10.613], 'BLZ': [0.0, 0], 'BEN': [0.33, 0], 'BTN': [0.0, 0], 'BOL': [0.685, 0.086], 'BIH': [11.887, 0.0], 'BWA': [0.0, 0.0], 'BRA': [5.264, 0.183], 'BRN': [4.572, 0.0], 'BGR': [3.31, 0.288], 'BFA': [1.005, 0.096], 'BDI': [0.0, 0], 'KHM': [0.0, 0], 'CMR': [1.507, 0.0], 'CAN': [27.37, 0.609], 'CPV': [0.0, 0.0], 'CAF': [0.0, 0], 'TCD': [0.0, 0], 'CHL': [15.327, 0.209], 'CHN': [0.057, 0.005], 'COL': [3.125, 0.02], 'COG': [0.0, 0], 'CRI': [5.497, 0.0], 'CIV': [0.417, 0.0], 'HRV': [23.385, 0.0], 'CUB': 

In [5]:
# calculate the max values for the graph y indices
# maybe don't need this?

#sort_y_cases = sorted(dates_obj.values(), key=itemgetter(0), reverse=True)
#sort_y_deaths = sorted(dates_obj.values(), key=itemgetter(2), reverse=True)

#max_y_cases = sort_y_cases[0][0]
#max_y_deaths = sort_y_deaths[0][2]

#print(max_y_cases)
#print(max_y_deaths)

In [6]:
# package up the matplotlib data
"""
# create the labels
labels_obj = {}
labels_obj['chart_title'] = "Global daily mean new cases and new deaths"
labels_obj['x_axis_label'] = "Date"
labels_obj['y1_axis_label'] = "Mean new cases"
labels_obj['y2_axis_label'] = "Mean new deaths"
labels_obj['legend_1'] = "New cases"
labels_obj['legend_2'] = "New deaths"


# reorganize the results
results_obj = {}
x_axis = []
y1_mean = []
y2_mean = []
#y1_std = []
#y2_std = []

for key, value in dates_obj.items():
    x_axis.append(key)
    y1_mean.append(value[0])
    #y1_std.append(value[1])
    y2_mean.append(value[2])
    #y2_std.append(value[3])

results_obj['x_axis'] = x_axis
results_obj['y1_axis'] = y1_mean
#results_ojb['y1_std'] = y1_std
results_obj['y2_axis'] = y2_mean
#results_ojb['y2_std'] = y2_std

#print(results[2])

# call matplotlib driver function
subplot_share_axis(labels_obj, results_obj)
"""

'\n# create the labels\nlabels_obj = {}\nlabels_obj[\'chart_title\'] = "Global daily mean new cases and new deaths"\nlabels_obj[\'x_axis_label\'] = "Date"\nlabels_obj[\'y1_axis_label\'] = "Mean new cases"\nlabels_obj[\'y2_axis_label\'] = "Mean new deaths"\nlabels_obj[\'legend_1\'] = "New cases"\nlabels_obj[\'legend_2\'] = "New deaths"\n\n\n# reorganize the results\nresults_obj = {}\nx_axis = []\ny1_mean = []\ny2_mean = []\n#y1_std = []\n#y2_std = []\n\nfor key, value in dates_obj.items():\n    x_axis.append(key)\n    y1_mean.append(value[0])\n    #y1_std.append(value[1])\n    y2_mean.append(value[2])\n    #y2_std.append(value[3])\n\nresults_obj[\'x_axis\'] = x_axis\nresults_obj[\'y1_axis\'] = y1_mean\n#results_ojb[\'y1_std\'] = y1_std\nresults_obj[\'y2_axis\'] = y2_mean\n#results_ojb[\'y2_std\'] = y2_std\n\n#print(results[2])\n\n# call matplotlib driver function\nsubplot_share_axis(labels_obj, results_obj)\n'