In [1]:
import requests
from contextlib import closing
import csv
import pandas as pd
import xarray as xr

In [2]:
# reading in era5 files
countries_file = '../era5_ne_countries.csv'
era5_countries = pd.read_csv(countries_file).to_xarray()

states_file = '../era5_ne_states.csv'
era5_states = pd.read_csv(states_file).to_xarray()

In [3]:
#find closest index from era5
def find_era5_index(lat, lon, era5):
    era5_index = era5.index.values
    era5_lat = era5.cent_lat.values
    era5_lon = era5.cent_lon.values

    dist = [(lat - era5_lat[i])**2 + (lon - era5_lon[i])**2 for i in era5_index]
    index = dist.index(min(dist))
    return index

In [4]:
url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'

with closing(requests.get(url, stream=True)) as r:
    f = (line.decode('utf-8') for line in r.iter_lines())
    reader = csv.reader(f, delimiter=',', quotechar='"')
    
    header = next(reader)
    time = pd.date_range(start=header[4], end=header[-1])
    
    ds_dict = {}
    
    for line in reader:
        
        # columns
        state = line[0]
        country = line[1]
        lat = float(line[2])
        lon = float(line[3])
        cases = [float(i) for i in line[4:]]
        
        # name
        if state:
            region_name = (state +'_' + country).replace(' ', '_').lower()
        else:
            region_name = country.replace(' ', '_').replace(',', '').lower()     
        
        # find closest index from era5
        countries_index = find_era5_index(lat, lon, era5_countries)
        states_index = find_era5_index(lat, lon, era5_states)
        
        # create DataArray
        da = xr.DataArray(cases, coords = [time], dims = ['time'], name = region_name)
        da.attrs['lat'] = lat
        da.attrs['lon'] = lon
        da.attrs['state'] = state
        da.attrs['country'] = country
        da.attrs['era5_countries_index'] = countries_index
        da.attrs['era5_states_index'] = states_index
        
        # filling dataset dictionary
        ds_dict[region_name] = da.to_dict()

#create DataSet
ds = xr.Dataset.from_dict(ds_dict)

print(ds)

<xarray.Dataset>
Dimensions:                                 (time: 64)
Dimensions without coordinates: time
Data variables:
    afghanistan                             (time) float64 0.0 0.0 ... 74.0 84.0
    albania                                 (time) float64 0.0 0.0 ... 146.0
    algeria                                 (time) float64 0.0 0.0 ... 302.0
    andorra                                 (time) float64 0.0 0.0 ... 188.0
    angola                                  (time) float64 0.0 0.0 ... 3.0 3.0
    antigua_and_barbuda                     (time) float64 0.0 0.0 ... 3.0 3.0
    argentina                               (time) float64 0.0 0.0 ... 387.0
    armenia                                 (time) float64 0.0 0.0 ... 265.0
    australian_capital_territory_australia  (time) float64 0.0 0.0 ... 39.0 39.0
    new_south_wales_australia               (time) float64 0.0 0.0 ... 1.029e+03
    northern_territory_australia            (time) float64 0.0 0.0 ... 6.0 6.0
    queens

In [5]:
ds['korea_south']

<xarray.DataArray 'korea_south' (time: 64)>
array([1.000e+00, 1.000e+00, 2.000e+00, 2.000e+00, 3.000e+00, 4.000e+00,
       4.000e+00, 4.000e+00, 4.000e+00, 1.100e+01, 1.200e+01, 1.500e+01,
       1.500e+01, 1.600e+01, 1.900e+01, 2.300e+01, 2.400e+01, 2.400e+01,
       2.500e+01, 2.700e+01, 2.800e+01, 2.800e+01, 2.800e+01, 2.800e+01,
       2.800e+01, 2.900e+01, 3.000e+01, 3.100e+01, 3.100e+01, 1.040e+02,
       2.040e+02, 4.330e+02, 6.020e+02, 8.330e+02, 9.770e+02, 1.261e+03,
       1.766e+03, 2.337e+03, 3.150e+03, 3.736e+03, 4.335e+03, 5.186e+03,
       5.621e+03, 6.088e+03, 6.593e+03, 7.041e+03, 7.314e+03, 7.478e+03,
       7.513e+03, 7.755e+03, 7.869e+03, 7.979e+03, 8.086e+03, 8.162e+03,
       8.236e+03, 8.320e+03, 8.413e+03, 8.565e+03, 8.652e+03, 8.799e+03,
       8.961e+03, 8.961e+03, 9.037e+03, 9.137e+03])
Dimensions without coordinates: time
Attributes:
    lat:                   36.0
    lon:                   128.0
    state:                 
    country:               Korea