$$
\text{PET} = 0.0023 \times R_A \times \sqrt{T_{max} - T_{min}} \times (T + 17.8)
$$

$R_A$ should be multiplied by 0.408 to convert it from [MJ m-2 day-1] to [mm day-1].

In [None]:
# TODO Describe the importance of docstrings
#  - Data types for "Parameters"

import numpy as np

def toa_radiation(latitude, doy):
    '''
    Top-of-atmosphere (TOA) radiation for a given latitude (L) and day of year
    (DOY) can be calculated as:

    R = ((24 * 60) / pi) * G * d * (w * sin(L) * sin(D) + cos(L) * cos(D) * sin(w))

    Where G is the solar constant, 0.0820 [MJ m-2 day-1]; d is the earth-sun
    distance; w is the sunset hour angle; and D is the solar declination angle.
    
    For more information, consult the FAO documentation:

        https://www.fao.org/4/X0490E/x0490e07.htm#radiation
    
    Parameters
    ----------
    latitude : float
        The latitude on earth, in degrees
    doy : int
        The day of the year (DOY), an integer on [1,366]
    
    Returns
    -------
    Number
        Top-of-atmosphere (TOA) radiation, in [MJ m-2 day-1]
    '''
    solar_constant = 0.0820 # [MJ m-2 day-1]
    pi = 3.14159
    
    # Convert latitude from degrees to radians
    lat_radians = np.deg2rad(latitude)
    # Earth-Sun distance, as a function of day-of-year (DOY)
    earth_sun_dist = 1 + 0.0033 * np.cos(doy * ((2 * pi) / 365))
    # Solar declination, as a function of DOY
    declination = 0.409 * np.sin(doy * ((2 * pi) / 365) - 1.39)
    
    # Sunset hour angle; we use np.where() below to guard against
    #   warnings where arccos() would return invalid values, which
    #   happens when the argument is outside [-1, 1]
    _hour_angle = -np.tan(lat_radians) * np.tan(declination)
    _hour_angle = np.where(np.abs(_hour_angle) > 1, np.nan, _hour_angle)
    sunset_hour_angle = np.arccos(_hour_angle)
    
    return ((24 * 60) / pi) * solar_constant * earth_sun_dist *\
        (sunset_hour_angle * np.sin(lat_radians) * np.sin(declination) +
            np.cos(lat_radians) * np.cos(declination) * np.sin(sunset_hour_angle))

#### &#x1F3C1; Challenge: Writing a well-documented function

$$
\text{PET} = 0.0023 \times R_A \times \sqrt{T_{max} - T_{min}} \times (T + 17.8)
$$

- $R_A$ is the top-of-atmosphere solar radiation, in mm H$_2$O equivalent per month
- $T_{max}$ is the monthly maximum temperature, in degrees C
- $T_{min}$ is the monthly minimum temperature, in degrees C
- $T$ is the monthly average temperature, in degrees C

In [None]:
def potential_et(toa_radiation, temp_max, temp_min, temp_mean):
    '''
    Calculates potential evapotranspiration, according to the Hargreaves
    equation:

    PET = 0.0023 * R * sqrt(Tmax - Tmin) * (Tmean + 17.8)

    Where R is the top-of-atmosphere (TOA) radiation (mm month-1); Tmax and 
    Tmin are the maximum and minimum monthly air temperatures (degrees C),
    respectively; and Tmean is monthly mean air temperature (degrees C).

    Parameters
    ----------
    toa_radiation : Number
        The top-of-atmosphere (TOA) radiation (mm month-1)
    temp_max : Number
        Maximum monthly air temperature (degrees C)
    temp_min : Number
        Minimum monthly air temperature (degrees C)
    temp_mean : Number
        Average monthly air temperature (degrees C)

    Returns
    -------
    Number
        The potential evapotranspiration (PET) in [mm month-1]
    '''
    return 0.0023 * toa_radiation * np.sqrt(temp_max - temp_min) * (temp_mean + 17.8)

## Downloading MERRA-2 air temperature data

In [None]:
import earthaccess
import xarray as xr
from matplotlib import pyplot

auth = earthaccess.login()

results = earthaccess.search_data(
    short_name = 'M2SDNXSLV',
    temporal = ("2024-01-01", "2024-05-31"))

In [None]:
# Could take about 1 minute on a broadband connection
earthaccess.download(results, 'data_raw/MERRA2')

---

## Analyzing large data collections

In [None]:
ds = xr.open_mfdataset('./data_raw/MERRA2/*.nc4')
ds

In [None]:
ds['T2MMEAN']

In [None]:
# TODO Figuring out what the coordinates are

ds.coords

In [None]:
# TODO Re-chunking the data; give example of "what if" we were interested in calculating trends

ds = xr.open_mfdataset('./data_raw/MERRA2/*.nc4')
ds['T2MMEAN']

In [None]:
# TODO Even though we asked for 122 elements along the "time" dimension, because the `chunks` argument is
#    applied on a *per-file basis,* it can't build chunks of that size when reading in the data

ds = xr.open_mfdataset('./data_raw/MERRA2/*.nc4', chunks = {'time': 122})
ds['T2MMEAN']

In [None]:
# TODO We can, however, create chunks within each file

ds = xr.open_mfdataset('./data_raw/MERRA2/*.nc4', chunks = {'lat': 182, 'lon': 288})
ds['T2MMEAN']

In [None]:
# TODO Re-chunking the data *after* loading is generally inefficient, but might be necessary; 
#    give example of "what if" we were interested in calculating trends

ds = xr.open_mfdataset('./data_raw/MERRA2/*.nc4')
ds = ds.chunk({'time': 122})
ds['T2MMEAN']

In [None]:
# TODO But today, the function we're going to apply to the data doesn't depend on neighboring pixels or groups of pixels over time, so we don't care so much about how the chunks are formed

ds = xr.open_mfdataset('./data_raw/MERRA2/*.nc4', chunks = 'auto')
ds['T2MMEAN']

## Computing PET using Hargreaves equation

In [None]:
toa_radiation(32, 200)

In [None]:
lats = np.array([22, 32, 42])

toa_radiation(lats, 200)

In [None]:
from matplotlib import pyplot

doy = np.arange(1, 365)

rad = toa_radiation(32, doy)
pyplot.plot(doy, rad, 'k-')

In [None]:
# TODO Vectorization

toa_radiation(lats, doy)

### Deriving variables from `xarray` coordinates

In [None]:
ds.coords

In [None]:
ds.lat.shape

In [None]:
# TODO Vectorization
# TODO Getting an array of latitude values to match our temperature arrays

lats = ds['lat'].values
lats = lats.reshape((361, 1)).repeat(ds.lon.size, axis = 1)
lats.shape

In [None]:
# TODO Have to specify the dimensions of a new variable

ds['lat_grid'] = (('lat', 'lon'), lats)
ds

In [None]:
# TODO https://docs.xarray.dev/en/stable/user-guide/time-series.html#datetime-components

doy = ds['time.dayofyear'].values
doy

### Calculating top-of-atmosphere radiation

In [None]:
test = ds.sel(time = '2024-05-01')

rad = toa_radiation(test['lat_grid'].values, test['time.dayofyear'].values)
rad.shape

In [None]:
test['toa_radiation'] = (('lat', 'lon'), rad)
test['toa_radiation'].plot()

In [None]:
def my_function(x):
    return x.T2MMIN + x.T2MMAX

xr.map_blocks(my_function, ds)

In [None]:
# TODO Lazy evaluation (should be a review from Part 1)

result = xr.map_blocks(my_function, ds).compute()
result

---

## More resources

- The National Center for Atmospheric Research (NCAR) has an excellent article on ["Using `dask` to scale up your data analysis."](https://ncar.github.io/Xarray-Dask-ESDS-2024/notebooks/02-dask-intro.html)