In [2]:
import requests
from contextlib import closing
import csv
import pandas as pd
import xarray as xr

In [8]:
# reading in era5 files
countries_file = '../era5_ne_countries.csv'
era5_countries = pd.read_csv(countries_file).to_xarray()

states_file = '../era5_ne_states.csv'
era5_states = pd.read_csv(states_file).to_xarray()

In [None]:
#find closest index from era5
def find_era5_index(lat, lon, era5):
    era5_index = era5.index.values
    era5_lat = era5.cent_lat.values
    era5_lon = era5.cent_lon.values

    dist = [(lat - era5_lat[i])**2 + (lon - era5_lon[i])**2 for i in era5_index]
    index = dist.index(min(dist))
    return index

In [None]:
url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'

with closing(requests.get(url, stream=True)) as r:
    f = (line.decode('utf-8') for line in r.iter_lines())
    reader = csv.reader(f, delimiter=',', quotechar='"')
    
    header = next(reader)
    time = pd.date_range(start=header[4], end=header[-1])
    
    ds_dict = {}
    
    for line in reader:
        
        # columns
        state = line[0]
        country = line[1]
        lat = float(line[2])
        lon = float(line[3])
        cases = [float(i) for i in line[4:]]
        
        # name
        if state:
            region_name = (state +'_' + country).replace(' ', '_').lower()
        else:
            region_name = country.replace(' ', '_').replace(',', '').lower()     
        
        # find closest index from era5
        countries_index = find_era5_index(lat, lon, era5_countries)
        states_index = find_era5_index(lat, lon, era5_states)
        
        # create DataArray
        da = xr.DataArray(cases, coords = [time], dims = ['time'], name = region_name)
        da.attrs['lat'] = lat
        da.attrs['lon'] = lon
        da.attrs['state'] = state
        da.attrs['country'] = country
        da.attrs['era5_countries_index'] = countries_index
        da.attrs['era5_states_index'] = states_index
        
        # filling dataset dictionary
        ds_dict[region_name] = da.to_dict()

#create DataSet
ds = xr.Dataset.from_dict(ds_dict)

print(ds)

In [4]:
ds['korea_south']

NameError: name 'ds' is not defined