In [None]:
import datetime
import glob
import os
import earthaccess
import numpy as np
import h5py
import xarray as xr
import rasterio
import rioxarray
import py4eos
import pyproj
from shapely.geometry import Polygon
from rasterio.mask import mask as rio_mask
from rasterio.warp import calculate_default_transform, reproject, Resampling
from matplotlib import pyplot

auth = earthaccess.login()

VNP16_DIR = 'data/VNP16A2GF'
SMAP_DIR = 'data/SMAP_SM_L3'
IMERG_DIR = 'data/IMERG'
TIME_PERIOD = ('2023-01-01', '2023-12-31')

```python
help(earthaccess.search_data)
```

```
**bounding_box**: a tuple representing spatial bounds in the form
    `(lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat)`
```

In [None]:
bbox = (1.5, 34.0, 8.0, 37.0)

In [None]:
results = earthaccess.search_data(
    short_name = 'VNP16A2GF',
    temporal = TIME_PERIOD,
    bounding_box = tuple(bbox))

In [None]:
# "Check that we're getting N results for N dates"

len(results)

In [None]:
# Only download the files once; i.e., if we haven't already downloaded any
if len(glob.glob(f'{VNP16_DIR}/*')) == 0:
    earthaccess.download(results, VNP16_DIR)

In [None]:
hdf = py4eos.read_file('data/VNP16A2GF/VNP16A2GF.A2023001.h18v05.002.2025003083850.h5', platform = 'VIIRS')
hdf

In [None]:
# TODO Note scale factors and valid min/max

et0 = hdf.get('ET_500m')
et = np.where(np.abs(et0) >= 32700, np.nan, et0 * 0.1)

In [None]:
pyplot.imshow(et, interpolation = 'nearest')
pyplot.colorbar()

In [None]:
def reproject_viirs(hdf, field):
    '''
    Reprojects a VIIRS ET dataset to the global EASE-Grid 2.0.

    Parameters
    ----------
    hdf : py4eos.EOSHDF4
        The EOSHDF4 instance connected to the VIIRS ET dataset
    field : str
        The name of the data variable, e.g., "ET_500m"

    Returns
    -------
    rasterio.io.DatasetWriter
    '''
    et_raster = hdf.to_rasterio(
        field, filename = '', driver = 'MEM', nodata = 32766., scale_and_offset = True)
    
    # First, resample the ET data to 1-km resolution
    arr = et_raster.read(out_shape = (1200, 1200), resampling = Resampling.average)
    arr = np.where(np.abs(arr) >= 32700, np.nan, arr)
    # We have to re-create the raster dataset, now at 1-km resolution
    et_raster_1km = rasterio.open(
        '', 'w+', driver = 'MEM', height = 1200, width = 1200,
        count = 1, dtype = np.float32, crs = et_raster.crs, 
        transform = et_raster.transform * et_raster.transform.scale(2)) # NOTE: Scaling to 1 km
    et_raster_1km.write(arr[0], 1)
    
    # Second, project the data onto a global EASE-Grid 2.0
    new_transform, width, height = calculate_default_transform(
        et_raster_1km.crs, pyproj.CRS(6933), 1200, 1200, *et_raster_1km.bounds)
    et_raster_ease2 = rasterio.open(
        '', 'w+', driver = 'MEM', height = height, width = width,
        count = 1, dtype = np.float32, crs = pyproj.CRS(6933), transform = new_transform)
    reproject(
        source = rasterio.band(et_raster_1km, 1),
        destination = rasterio.band(et_raster_ease2, 1),
        resampling = Resampling.bilinear,
        src_nodata = np.nan, # Necessary so that missing data is interpolated
        dst_nodata = np.nan)
    return et_raster_ease2

In [None]:
et_raster_ease2 = reproject_viirs(hdf, 'ET_500m')
img = et_raster_ease2.read(1)
pyplot.imshow(img, interpolation = 'nearest')
pyplot.colorbar()

## TODO Adding ET data processing to SnakeMake

In [None]:
results = earthaccess.search_data(
    short_name = 'VNP16A2GF',
    temporal = TIME_PERIOD,
    bounding_box = tuple(bbox))

## Initializing soil water content using satellite soil moisture data

DOI: https://dx.doi.org/10.5067/M20OXIZHY3RJ

In [None]:
# NOTE: We only need the first few days, to initialize soil water

results = earthaccess.search_data(
    short_name = 'SPL3SMP_E',
    temporal = ('2023-01-01', '2023-01-10'))

In [None]:
# Only download the files once; i.e., if we haven't already downloaded any
if len(glob.glob(f'{SMAP_DIR}/*')) == 0:
    earthaccess.download(results, SMAP_DIR)

In [None]:
# TODO Getting the bounds of our VIIRS tile, for clipping other datasets

bb = et_raster_ease2.bounds
bounds = Polygon([
    (bb.left, bb.bottom), 
    (bb.left, bb.top),
    (bb.right, bb.top),
    (bb.right, bb.bottom)
])
bounds

In [None]:
# NOTE: We're going to stack the AM and PM data together for each day,
#    then take the mean over the first 10 days

stack = []
for filename in glob.glob(f'{SMAP_DIR}/*.h5'):
    hdf = py4eos.read_file(filename, 'SMAP')
    sm_am = hdf.to_rasterio('Soil_Moisture_Retrieval_Data_AM/soil_moisture_dca', '', driver = 'MEM', nodata = -9999)
    sm_am_clip, new_transform = rio_mask(sm_am, [bounds], crop = True)
    sm_pm = hdf.to_rasterio('Soil_Moisture_Retrieval_Data_PM/soil_moisture_dca_pm', '', driver = 'MEM', nodata = -9999)
    sm_pm_clip, new_transform = rio_mask(sm_pm, [bounds], crop = True)
    sm_am_clip[sm_am_clip < 0] = np.nan
    sm_pm_clip[sm_pm_clip < 0] = np.nan
    sm = np.where(np.isnan(sm_am_clip), sm_pm_clip, sm_am_clip)
    stack.append(sm)

sm = np.nanmean(np.stack(stack, axis = 0), axis = 0)

In [None]:
pyplot.imshow(sm[0], interpolation = 'nearest')

## Getting precipitation data from IMERG

In [None]:
# TODO Turn this into an exercise for participants?

results = earthaccess.search_data(
    short_name = 'GPM_3IMERGDF',
    temporal = TIME_PERIOD)

In [None]:
# Only download the files once; i.e., if we haven't already downloaded any
if len(glob.glob(f'{IMERG_DIR}/*')) == 0:
    earthaccess.download(results, IMERG_DIR)

In [None]:
from tqdm import tqdm

stack = []
for filename in tqdm(glob.glob(f'{IMERG_DIR}/*.nc4')):
    ds = xr.open_dataset(filename)
    ds_ease2 = ds[['precipitation']]\
        .transpose('time', 'lat', 'lon')\
        .rio.write_crs(4326)\
        .rio.set_spatial_dims('lon', 'lat')\
        .rio.reproject(pyproj.CRS(6933), resolution = 9000)\
        .rio.clip([bounds])
    stack.append(ds_ease2)
    break

In [None]:
ds_precip = xr.concat(stack, dim = 'time')

In [None]:
ds_precip

## TODO Adding precip data processing to SnakeMake

## Packaging derived data products