In [1]:
import requests
from contextlib import closing
import csv
import numpy as np
import pandas as pd
import xarray as xr

In [2]:
# reading in era5 files
states_file = '../era5_ne_states.csv'
era5_states = pd.read_csv(states_file).to_xarray()

countries_file = '../era5_ne_countries.csv'
era5_countries = pd.read_csv(countries_file).to_xarray()

In [3]:
def find_era5_index(lat, lon, era5):
    '''
    Returns the era5 index corresponding with the closet lat-lon coordinate

            Parameters:
                    lat (float): latitude value from covid19 data
                    lon (float): longitude value from covid19 data
                    era5 (xr.Dataset): era5 dataset for either states or countries. 
                        Accepts: `era5_states` or `era5_countries`

            Returns:
                    index (int): index from era5 dataset that maps to a state or country
    '''
    era5_index = era5.index.values
    era5_lat = era5.cent_lat.values
    era5_lon = era5.cent_lon.values

    dist = [(lat - era5_lat[i])**2 + (lon - era5_lon[i])**2 for i in era5_index]
    index = dist.index(min(dist))
    return index

In [4]:
# read COVID19 data from url and create xr.Dataset
url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'

with closing(requests.get(url, stream=True)) as r:
    f = (line.decode('utf-8') for line in r.iter_lines())
    reader = csv.reader(f, delimiter=',', quotechar='"')
    
    header = next(reader)
    time = pd.date_range(start=header[4], end=header[-1])
    
    ds_dict = {} 
    for line in reader:
        state = line[0]
        country = line[1]
        lat = float(line[2])
        lon = float(line[3])
        cases = [int(i) for i in line[4:]]
        
        if state:
            region_name = (state +'_' + country).replace(' ', '_').lower()
        else:
            region_name = country.replace(' ', '_').replace(',', '').lower()     
        
        states_index = find_era5_index(lat, lon, era5_states)
        countries_index = find_era5_index(lat, lon, era5_countries)
        
        da = xr.DataArray(cases, coords = [time], dims = ['time'], name = region_name)
        da.attrs['lat'] = lat
        da.attrs['lon'] = lon
        da.attrs['state'] = era5_states.name_en[states_index].values
        da.attrs['state_index'] = states_index
        da.attrs['country'] = era5_countries.NAME_EN[countries_index].values
        da.attrs['country_index'] = countries_index
        
        ds_dict[region_name] = da.to_dict()

ds = xr.Dataset.from_dict(ds_dict)

In [5]:
ds['korea_south']

<xarray.DataArray 'korea_south' (time: 65)>
array([   1,    1,    2,    2,    3,    4,    4,    4,    4,   11,   12,   15,
         15,   16,   19,   23,   24,   24,   25,   27,   28,   28,   28,   28,
         28,   29,   30,   31,   31,  104,  204,  433,  602,  833,  977, 1261,
       1766, 2337, 3150, 3736, 4335, 5186, 5621, 6088, 6593, 7041, 7314, 7478,
       7513, 7755, 7869, 7979, 8086, 8162, 8236, 8320, 8413, 8565, 8652, 8799,
       8961, 8961, 9037, 9137, 9241])
Dimensions without coordinates: time
Attributes:
    lat:            36.0
    lon:            128.0
    state:          Daegu
    state_index:    4141
    country:        South Korea
    country_index:  96

In [6]:
def combine_duplicated_indices(ds, region_level='states'):
    '''
    Checks if more than one region in the xr.Dataset correspond to the same state or country index 
    and adds COVID19 cases together for those regions.

            Parameters:
                    ds (xr.Dataset): COVID19 data
                    region_level (str): specification of combining regions based on state or country index 
                        Accepts: `states` or `countries`

            Returns:
                    ds_era5_regions (xr.Dataset): COVID19 data mapped to era
    '''
    ds = ds.copy()
    era5_regions_dict = {}

    index_set = set()
    for region in ds:
        
        if region_level == 'countries':
            name = ds[region].attrs['country']
            index = ds[region].attrs['country_index']
            
            del ds[region].attrs['state']
            del ds[region].attrs['state_index']
            
        elif region_level == 'states':
            name = ds[region].attrs['state']
            index = ds[region].attrs['state_index']
        
        region_dict = ds[region].to_dict()
        if index not in index_set:
            index_set.add(index)
            era5_regions_dict[name] = region_dict
        else:
            era5_regions_dict[name]['data'] = np.add(era5_regions_dict[name]['data'], region_dict['data'])
            
    ds_era5_regions = xr.Dataset.from_dict(era5_regions_dict)
    return ds_era5_regions

In [7]:
ds_countries = combine_duplicated_indices(ds, region_level='countries')
ds_countries['South Korea']

<xarray.DataArray 'South Korea' (time: 65)>
array([    4,    12,    26,    47,    82,   126,   169,   233,   291,   363,
         420,   481,   545,   599,   667,   743,   818,   879,   937,   985,
        1030,  1068,  1107,  1144,  1164,  1183,  1197,  1203,  1206,  1281,
        1584,  1814,  1987,  2219,  2364,  2648,  3153,  3724,  4537,  5125,
        5724,  6575,  7010,  7477,  7982,  8430,  8703,  8867,  8902,  9146,
        9260,  9370,  9477,  9553,  9627,  9712,  9805,  9957, 10045, 10194,
       10361, 10362, 10441, 10544, 10652])
Dimensions without coordinates: time
Attributes:
    lat:            32.9711
    lon:            119.455
    country:        South Korea
    country_index:  96

In [8]:
ds_states = combine_duplicated_indices(ds, region_level='states')
ds_states['Urozgan']

<xarray.DataArray 'Urozgan' (time: 65)>
array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  4,  4,  5,  7,  7,  7, 11, 16,
       21, 22, 22, 22, 24, 24, 40, 40, 74, 84, 94])
Dimensions without coordinates: time
Attributes:
    lat:            33.0
    lon:            65.0
    state:          Urozgan
    state_index:    3885
    country:        Afghanistan
    country_index:  103