In [None]:
import datetime
import earthaccess
import numpy as np
import h5py
import xarray as xr
from matplotlib import pyplot

auth = earthaccess.login()

$$
P = E + R + \Delta S
$$

![](./assets/water_budget.png)

[*Image courtesy of the USGS*](https://www.usgs.gov/media/images/components-a-simple-water-budget-part-a-watershed)

---

## Calculating basin-scale precipitation

https://dx.doi.org/10.5067/GPM/IMERG/3B-MONTH/07

In [None]:
import geopandas

basin = geopandas.read_file('/home/arthur.endsley/Downloads/TOPS/YellowstoneRiver_drainage_WSG84.shp')
basin

In [None]:
basin.plot()

In [None]:
results = earthaccess.search_data(
    short_name = 'GPM_3IMERGM',
    temporal = ('2018-01-01', '2022-12-31'))

In [None]:
earthaccess.download(results, 'data/IMERG-Final')

In [None]:
with h5py.File('data_raw/IMERG-Final/3B-MO.MS.MRG.3IMERG.20180701-S000000-E235959.07.V07B.HDF5', 'r') as hdf:
    longitude = hdf['Grid/lon'][:]
    latitude = hdf['Grid/lat'][:]
    print(longitude.shape)
    print(latitude.shape)
    print(hdf['Grid/precipitation'].shape)
    print(hdf['Grid/precipitation'].attrs['units'])

In [None]:
import glob
file_list = glob.glob('data_raw/IMERG-Final/*.HDF5')
file_list.sort()
len(file_list)

In [None]:
file_list[0].split('.')[4][0:8]

In [None]:
for filename in file_list:
    date = datetime.datetime.strptime(filename.split('.')[4][0:8], '%Y%m%d')
    ds = xr.open_dataset(filename, group = 'Grid')
    ds = ds.assign_coords({
        'time': [date], 'lon': longitude, 'lat': latitude
    })
    # Rotate the image data so that latitude is the first (vertical) axis
    ds = ds.transpose('time', 'lat', 'lon', ...)
    ds.get(['precipitation']).to_netcdf(f'data/IMERG-Final_netcdf4/IMERG-Final_{date.strftime("%Y%m%d")}.nc4')

In [None]:
ds = xr.open_mfdataset('data/IMERG-Final_netcdf4/IMERG-Final_*.nc4')
ds

In [None]:
from pyproj import CRS

ds = ds.rio.set_spatial_dims('lon', 'lat')
ds = ds.rio.write_crs(CRS.from_epsg(4326))

In [None]:
ds_clip = ds.rio.clip(basin.geometry.values, basin.crs, from_disk = True)
ds_clip.precipitation

In [None]:
ds_clip.precipitation[0].plot()

In [None]:
ds_clip.precipitation.sum(['lon','lat'])

In [None]:
precip_series = ds_clip.precipitation.sum(['lon','lat']).values
pyplot.plot(precip_series)

In [None]:
import calendar

calendar.mdays

In [None]:
days_in_month = np.array(calendar.mdays)[ds_clip.coords['time.month'].values]
days_in_month

In [None]:
precip_total = precip_series * days_in_month

---

## Obtaining basin-scale runoff data

In [None]:
import xarray as xr

ds = xr.open_dataset('/home/arthur.endsley/Downloads/TOPS/HYSETS_2020_QC_stations.nc')
ds

In [None]:
ds_2022 = ds.sel(time = slice('2022-01-01', '2022-12-31'))
ds_2022

In [None]:
ds_2022['runoff_mean'].plot()

In [None]:
ds_2022['runoff_mean'].groupby('time.month').sum().plot()