# Variable Preprocessing

In [None]:
# install required libraries
!pip install xarray netCDF4 h5netcdf numpy

In [1]:
import xarray as xr
import numpy as np

### Load ERA5-Land and ERA5 data

In [2]:
era5land = xr.open_dataset('era5-land.nc') # open ERA5-Land NetCDF file
print(era5land) # view structure
print("\n--------------------------------------------------------------------------------\n")

era5 = xr.open_dataset("era5.nc") # open ERA5 NetCEF file
print(era5) # view structure

<xarray.Dataset> Size: 180MB
Dimensions:     (valid_time: 744, latitude: 96, longitude: 105)
Coordinates:
  * valid_time  (valid_time) datetime64[ns] 6kB 2019-08-01 ... 2019-08-31T23:...
    expver      (valid_time) <U4 12kB ...
  * latitude    (latitude) float64 768B 42.0 41.9 41.8 41.7 ... 32.7 32.6 32.5
  * longitude   (longitude) float64 840B -124.5 -124.4 -124.3 ... -114.2 -114.1
    number      int64 8B ...
Data variables:
    d2m         (valid_time, latitude, longitude) float32 30MB ...
    t2m         (valid_time, latitude, longitude) float32 30MB ...
    ssrd        (valid_time, latitude, longitude) float32 30MB ...
    u10         (valid_time, latitude, longitude) float32 30MB ...
    v10         (valid_time, latitude, longitude) float32 30MB ...
    sp          (valid_time, latitude, longitude) float32 30MB ...
Attributes:
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conv

### convert varaible units

In [3]:
# convert t2m from K to Celsius -- used as "Tair" input
t2m_c = era5land["t2m"] - 273.15
t2m_c.attrs["units"] = "C"

# convert d2m from K to Celsius -- used in "relhum" calculation
d2m_c = era5land["d2m"] - 273.15
d2m_c.attrs["units"] = "C"

# convert ssrd from J/m2 to W/m2 -- used as "solar" input
era5land["ssrd"].valid_time.diff("time") # check that time resolution is hourly
ssrd_W = era5land["ssrd"] / 3600         # dividy by 3600 seconds since hourly accumulated
ssrd_W.attrs["units"] = "W m**-2"

# convert sp from Pa to hPa -- used as "pres" input
sp_hpa = era5land["sp"] / 100
sp_hpa.attrs["units"] = "hPa"

### calculate derived variables

In [13]:
# cza -- Cosine solar zenith angle (0-1); use calc_cza_int() or calc_solar_parameters()$cza if cza is not known
# reference "heatmetrics" R package for calculation

# fdir -- Fraction of surface solar radiation that is direct (0-1)
# calculate the fraction--reference "heatmetrics" R package for calculation (or maybe just ERA5["fdir"]/ERA5["ssdr"] )??
# remember to need to interpolate ERA5 fdir from 0.25-degree to 0.1-degree to match ERA5-land grid resolution

# calculate relative humidity (%) -- used as "relhum" input
vapor_pres = 610.94*np.exp(17.625*d2m_c / (243.04+d2m_c))
sat_vapor_pres = 610.94*np.exp(17.625*t2m_c / (243.04+t2m_c))
rh = 100*(vapor_pres/sat_vapor_pres)
rh.attrs["units"] = "%"

# calculate wind speed (m/s) -- used as "speed" input
u = era5land["u10"]
v = era5land["v10"]
ws = np.sqrt(u**2 + v**2)
ws.attrs["units"] = "m s**-1"

### obtain and derive "urban" variable

In [5]:
# from National Land Cover Database, https://www.usgs.gov/centers/eros/science/national-land-cover-database

### build a working dataset containing WBGT input variables
xr.Dataset creates a dataset resembling an in-memory representation of a NetCDF file. Consists of variables, coordinates, and attributes, which together form a self-describing dataset.

In [9]:
# zspeed = 10 m and dt = âˆ’0.052 C don't need to be in ds_work since they are fixed parameters (don't change through space and time)
# They can be inputted into the WBGT function as constants.

ds_work = xr.Dataset(            # NEED TO ADD: cza, fdir, urban
    {"solar": ssrd_W,
     # "cza": ...,
     # "fdir": ...,
     "pres": sp_hpa,
     "Tair": t2m_c,
     "relhum": rh,
     "speed": ws
     # "urban": ...
    },
    coords = era5land.coords)

# Create year, month, dday coordinates
time = ds_work.valid_time
ds_work = ds_work.assign_coords(
    year = time.dt.year,
    month = time.dt.month,
    # decimal day of month (UTC)
    dday = (time.dt.day
            + time.dt.hour / 24
            + time.dt.minute / 1440
            + time.dt.second / 86400))

print(ds_work)  # view structure

<xarray.Dataset> Size: 150MB
Dimensions:     (valid_time: 744, latitude: 96, longitude: 105)
Coordinates:
  * valid_time  (valid_time) datetime64[ns] 6kB 2019-08-01 ... 2019-08-31T23:...
    expver      (valid_time) <U4 12kB '0001' '0001' '0001' ... '0001' '0001'
    year        (valid_time) int64 6kB 2019 2019 2019 2019 ... 2019 2019 2019
    month       (valid_time) int64 6kB 8 8 8 8 8 8 8 8 8 8 ... 8 8 8 8 8 8 8 8 8
    dday        (valid_time) float64 6kB 1.0 1.042 1.083 ... 31.88 31.92 31.96
  * latitude    (latitude) float64 768B 42.0 41.9 41.8 41.7 ... 32.7 32.6 32.5
  * longitude   (longitude) float64 840B -124.5 -124.4 -124.3 ... -114.2 -114.1
    number      int64 8B 0
Data variables:
    solar       (valid_time, latitude, longitude) float32 30MB nan ... 6.541e+03
    pres        (valid_time, latitude, longitude) float32 30MB nan nan ... 978.8
    Tair        (valid_time, latitude, longitude) float32 30MB nan nan ... 40.38
    relhum      (valid_time, latitude, longitude) flo

In [10]:
# check units of preprocessed variables
print("solar", ds_work["solar"].attrs["units"])
# cza
# fdir
print("pres", ds_work["pres"].attrs["units"])
print("Tair", ds_work["Tair"].attrs["units"])
print("relhum", ds_work["relhum"].attrs["units"])
print("speed", ds_work["speed"].attrs["units"])  # what happened to the speed units?? should be "m s**-1"
# urban

solar W m**-2
pres hPa
Tair C
relhum %
speed m s**-1
