# TODO

- Highlight the ways in which we link to primary sources
- Highlight features of the implementation that make it more readable and interpretable

In [None]:
import datetime
import glob
import numpy as np
import xarray as xr
import rasterio
import rioxarray
import pandas as pd
from matplotlib import pyplot
from rasterio.warp import Resampling

DATE_START = datetime.date(2023, 10, 1)
DATE_END = datetime.date(2024, 9, 29)

In [None]:
with rasterio.open('data/processed/MODIS_MCD12Q1_Type5_cereal_croplands_h15v05_2023.tiff') as dataset:
    lc_map = dataset.read(1)

In [None]:
file_list = glob.glob('data/processed/VNP16_ET_and_PET/VNP16_ET_mm_8day-1_*.tiff')
file_list.sort()

rasters_et = []
rasters_pet = []
for filename in file_list:
    date_str = filename.split('/')[-1].split('_')[-1].replace('.tiff', '')
    date = datetime.datetime.strptime(date_str, '%Y%m%d')
    et0 = rioxarray.open_rasterio(filename)\
        .rename(band = 'time')\
        .assign_coords(time = [date])
    rasters_et.append(et0)
    # Do the same for PET, note the use of replace()
    pet0 = rioxarray.open_rasterio(filename.replace('ET_mm_8day-1', 'PET_mm_8day-1'))\
        .rename(band = 'time')\
        .assign_coords(time = [date])
    rasters_pet.append(pet0)

# Convert from [mm 8day-1] to [mm day-1]
ds_et0 = xr.concat(rasters_et, dim = 'time').to_dataset(name = 'ET') / 8.0
ds_pet0 = xr.concat(rasters_pet, dim = 'time').to_dataset(name = 'PET') / 8.0

ds_et = xr.merge([ds_et0, ds_pet0])

In [None]:
ds_et_daily = ds_et.where(lc_map == 1, np.nan).resample(time = '1D').ffill()
ds_et_daily

## Computing the initial soil water content

In [None]:
# TODO Might take several seconds
# TODO Now loading precipitation

example = rioxarray.open_rasterio('data/processed/VNP16_ET_and_PET/VNP16_ET_mm_8day-1_20230930.tiff')

# Resample the precip data to match our ET data
ds_precip = xr.open_dataset('data/processed/IMERG_precip_mm_day-1_for_study_area.nc4')\
    .rio.write_crs(example.rio.crs)
ds_precip_500m = ds_precip.rio.reproject_match(example, resampling = Resampling.bilinear)

In [None]:
ds_precip_500m.time.size

In [None]:
ds_et_daily.time.size

In [None]:
precip_total = ds_precip_500m.sum('time').precipitation

In [None]:
# NOTE: Our best guess for the soil water content might be
#    the mean annual balance between precip and ET

swc = (precip_total - ds_et_daily.ET.sum('time')) / ds_precip_500m.time.size # (366 days)

In [None]:
swc.where(swc >= 0, 0).where(lc_map == 1, np.nan).plot()

## Field capacity, rooting depth, and crop coefficients

The critical soil water level (SWC) is given by:
$$
\text{SWC}(t) = \text{FC}\times \text{SW}_f\times \text{RD}_f(t)
$$

- Field capacity (FC) depends on soil texture; we could use a texture map to account for the heterogeneity across our study area; for simplicity, we'll assume the soils are a sandy loam, with a median field capacity (from FAO 56 Table 19) of 0.23 m$^3$ m$^{-3}$. If the maximum rooting depth of wheat is about 1.5 m (https://doi.org/10.1016/j.fcr.2016.02.013), then this corresponds to:
$$
\text{FC} = \frac{0.23\, \text{m}^3\text{m}^{-3}}{1.5\,\text{m}\times 1.0\,\text{m} \times 1.0\,\text{m}} = 0.153 \,\text{m}
$$
- According to FAO 56 (Table 22), for Spring Wheat, $\text{SW}_f = 1 - 0.55 = 0.45$.
- $\text{RD}_f$ varies over time as the crop's roots develop. For wheat in Northern Algeria, sowing generally occurs in October or November, with the crop reaching maturity between February and April. Without knowing the conditions in individual fields, we'll assume sowing occured October 15 and the crop reached full maturity on April 1. Therefore, we'll have $\text{RD}_f$ increase from 0.1 on October 15 to 1.0 on April 1.
- For the crop coefficient, we'll use values from [FAO 56's Table 12.](https://www.fao.org/4/X0490E/x0490e0b.htm#tabulated%20kc%20values) There are three coefficients, depending on crop stage (initial, middle, and end). For Spring Wheat, these values are 0.3, 1.15, and 0.25; we'll assume that these crop stages occur on October 15, April 1, and June 1, respectively.

See for a visual aid: https://www.fao.org/4/r4082e/r4082e03.htm#2.3.1%20soil%20moisture%20content

In [None]:
def linear_interp(doy, known_values, known_doy):
    '''
    Linearly interpolates between coefficients on known dates.

    Parameters
    ----------
    doy : Sequence
        Integer sequence of day-of-year
    known_values : Sequence
        The known values (on specific days)
    known_doy : Sequence
        The day-of-year corresponding to each value in `known_values`

    Returns
    -------
    Sequence
    '''
    # Place the crop coefficients into the empty DOY time series
    arr = np.nan * np.ones(doy.shape)
    arr[np.in1d(doy, known_doy)] = known_values
    
    # Get the ordinal positions of each DOY value for which we know
    #    the crop coefficient
    x_loc = np.argwhere(np.in1d(doy, known_doy)).ravel()
    # Interpolate between the known crop coefficients
    return np.interp(np.arange(0, doy.size), x_loc, known_values)

In [None]:
# TODO We need to linearly interpolate the crop coefs

doy_start = int(DATE_START.strftime('%j'))
doy_k0 = int(datetime.date(2023, 10, 15).strftime('%j')) # Planting date
doy_k1 = int(datetime.date(2024, 4, 1).strftime('%j')) # Mature date
doy_k2 = int(datetime.date(2024, 6, 1).strftime('%j')) # Harvest date
doy_end = int(DATE_END.strftime('%j')) # To have a complete year
crop_coef_known = np.array([0.3, 1.15, 0.25])

# NOTE: Have to add a +1 to end because 2024 is a leap year
doy = np.concatenate([np.arange(doy_start, 366), np.arange(1, doy_end + 1)])
crop_coef = linear_interp(doy, crop_coef_known, [doy_k0, doy_k1, doy_k2])

In [None]:
# Get DOY of the planting date
doy.tolist().index(doy_k0)

# TODO Making sure that crop coef is zero prior to planting date
crop_coef[0:14] = 0

In [None]:
# TODO Note memory-saving potential of changing the dtype

print(crop_coef[20:30])
crop_coef = crop_coef.astype(np.float16)
print(crop_coef[20:30])

In [None]:
root_fraction = linear_interp(doy, [0.1, 1.0], [doy_k0, doy_k1])
root_fraction[0:14] = 0.0

# Get human-readable date labels
date_labels = [
    datetime.datetime.strptime('2023-%03d' % d, '%Y-%j') if d >= doy_start\
    else datetime.datetime.strptime('2024-%03d' % d, '%Y-%j')
    for d in doy
]
pyplot.ylabel('Crop Coefficient or Root Fraction')
pyplot.plot(date_labels, crop_coef, 'k-', label = 'Crop Coefficient')
pyplot.plot(date_labels, root_fraction, 'r-', label = 'Root Fraction')
pyplot.legend()
pyplot.show()

## Our first implementation

In [None]:
# TODO To begin, it's helpful to design the API before we even begin to code the implementation
#    Copy the final one here

def water_requirement_satisfaction_index(
        doy, crop_coef, pet, precip, root_fraction, sw_init, 
        sw_frac = 0.45, field_capacity = 0.153):
    '''
    Parameters
    ----------
    doy : Sequence
        The day of year (DOY) for each time step
    crop_coef : Sequence
        A sequence of crop coefficients for each time step
    pet : Sequence
        The potential evapotranspiration (mm day-1) for each day of simulation
    precip : Sequence
        The daily precipitation (mm day-1) for each day of simulation
    root_fraction : Sequence
        The root depth fraction, a dimensionless number between 0.0 and 1.0,
        for each day of simulation
    sw_init : Number
        The initial soil water level (mm), on the first day of simulation
    sw_frac : Number
        The soil water level, as a fraction of field capacity, below which
        AET becomes less than PET during the crop's mature stage; should be
        1.0 minus the allowable depletion fraction (dim.)
    field_capacity : Number
        The field capacity (mm)
    
    Returns
    -------
    Sequence
        The WRSI on each day of the simulation
    '''

In [None]:
def water_requirement_satisfaction_index(
        doy, crop_coef, pet, precip, root_fraction, sw_init, 
        sw_frac = 0.45, field_capacity = 0.153):
    '''
    Parameters
    ----------
    doy : Sequence
        The day of year (DOY) for each time step
    crop_coef : Sequence
        A sequence of crop coefficients for each time step
    pet : Sequence
        The potential evapotranspiration (mm day-1) for each day of simulation
    precip : Sequence
        The daily precipitation (mm day-1) for each day of simulation
    root_fraction : Sequence
        The root depth fraction, a dimensionless number between 0.0 and 1.0,
        for each day of simulation
    sw_init : Number
        The initial soil water level (mm), on the first day of simulation
    sw_frac : Number
        The soil water level, as a fraction of field capacity, below which
        AET becomes less than PET during the crop's mature stage; should be
        1.0 minus the allowable depletion fraction (dim.)
    field_capacity : Number
        The field capacity (mm)
    
    Returns
    -------
    Sequence
        The WRSI on each day of the simulation
    '''
    # At every time step (including t=0), compute SWC by Equation 7
    # At t=0, use sw_init in Equations 3-6 to compute AET
    # Then, for t >= 0:
    #    Compute SW by Equation 8 (i.e., use AET just calculated)
    assert pet.ndim == 1, 'pet_t should be a 1-dimensional numeric sequence'
    # Pre-allocate vectors for holding data for each time step
    paw = np.nan * np.ones(pet.shape, dtype = np.float32) # Plant available water
    sw  = np.nan * np.ones(pet.shape, dtype = np.float32) # Soil water
    aet_c = np.nan * np.ones(pet.shape, dtype = np.float32) # AETc
    # Compute PETc at each time step
    pet_c = pet * crop_coef # PETc
    # Compute the critical soil water at each time step
    sw_crit = field_capacity * sw_frac * root_fraction
    for t in range(len(pet)):
        # Compute plant available water (PAW)
        if t == 0:
            paw[t] = sw_init + precip[t] # At t=0, sw[t-1] is unknown
        else:
            paw[t] = sw[t-1] + precip[t]
        # Compute AET
        if paw[t] >= sw_crit[t]:
            aet_c[t] = pet_c[t]
        else:
            aet_c[t] = (paw[t] / sw_crit[t]) * pet_c[t]
        aet_c[t] = np.min([aet_c[t], paw[t]]) # Cannot be higher than PAW
        # Compute (remaining) soil water
        sw[t] = paw[t] - aet_c[t]
        sw[t] = np.min([sw[t], field_capacity]) # Cannot be higher than FC
        sw[t] = np.max([sw[t], 0.0])
    
    # At the end, compute WRSI on each day of the simulation; i.e.,
    #    compute totals on each day (a cumulative sum)
    wrsi = (np.cumsum(aet_c) / np.cumsum(pet_c)) * 100
    return wrsi.astype(np.float32)

In [None]:
# Random precipitation time series, between 0 and 3 mm
precip = np.random.choice([0, 1, 2, 3], size = doy.size, p = [0.6, 0.3, 0.08, 0.02])
pet = 3 * np.ones(doy.size) # Constant 3 mm of PET

wrsi = water_requirement_satisfaction_index(
    doy[14:], crop_coef[14:], pet[14:], precip[14:], root_fraction[14:], 
    sw_init = 10, sw_frac = 0.45, field_capacity = 0.153)

In [None]:
pyplot.plot(date_labels[15:], wrsi[1:], 'k-')

## Applying our function across space

In [None]:
pet = ds_et_daily.sel(time = slice(DATE_START, DATE_END)).PET.values

In [None]:
# Reshaping to a (T x N) array for N pixels
days, rows, cols = pet.shape
pet_raveled = pet.reshape((days, rows * cols))
pet_raveled.shape

In [None]:
# TODO lc_map has same shape as our raveled spatial domain

lc_map.ravel().shape

In [None]:
# TODO Filtering the NoData pixels out using the land-cover map
# TODO 65,000 pixels is better than 16 million!
pet_series = pet_raveled[:,lc_map.ravel() == 1]
pet_series.shape

In [None]:
precip_series = ds_precip_500m.sel(time = slice(DATE_START, DATE_END)).precipitation.values\
    .reshape((days, rows * cols))
precip_series = precip_series[:,lc_map.ravel() == 1]
precip_series.shape