# M3.3 - Tracking Changes to Research Code

*Part of:* [**Open Science for Water Resources**](https://github.com/OpenClimateScience/M3-Open-Science-for-Water-Resources)

All we did was move the `import` statements to the top of the code block, where they belong.

In [1]:
import calendar
import datetime
import glob
import earthaccess
import numpy as np
import h5py
import xarray as xr
import geopandas
from matplotlib import pyplot
from pyproj import CRS

auth = earthaccess.login()

basin = geopandas.read_file('/home/arthur.endsley/Workspace/NTSG/projects/Y2024_TOPS_Training/data/YellowstoneRiver_drainage_WSG84.shp')

# results = earthaccess.search_data(
#     short_name = 'GPM_3IMERGM',
#     temporal = ('2014-01-01', '2023-12-31'))
# earthaccess.download(results, 'data/IMERG-Final_monthly')

file_list = glob.glob('data/IMERG-Final_monthly/*.HDF5')
file_list.sort()

datasets = []
for i, filename in enumerate(file_list):
    # Only need to do this once, for the first file
    if i == 0:
        with h5py.File(filename, 'r') as hdf:
            longitude = hdf['Grid/lon'][:]
            latitude = hdf['Grid/lat'][:]
        
    date = datetime.datetime.strptime(filename.split('.')[4][0:8], '%Y%m%d')
    ds0 = xr.open_dataset(filename, group = 'Grid', decode_times = False).get(['precipitation'])
    # Define the missing coordinates
    ds0 = ds0.assign_coords({
        'time': [date], 'x': longitude, 'y': latitude
    })
    
    # Define the coordinate reference system (CRS) and the spatial coordinates
    ds0 = ds0.rio.write_crs(CRS.from_epsg(4326))
    ds0 = ds0.rio.set_spatial_dims('lon', 'lat')

    ds_clip = ds0.rio.clip(basin.geometry.values)
    
    # Only write the file if it doesn't exist (in case we run this again)
    datasets.append(ds_clip)

# Merge the datasets together along the "time" axis (i.e., build a time series)
ds = xr.concat(datasets, dim = 'time')

# Converting from [mm hour-1] to [mm month-1]
days_in_month = np.array(calendar.mdays)[ds.coords['time.month'].values]
ds['precip_monthly'] = ds.precipitation * 24 * days_in_month.reshape((days_in_month.size, 1, 1))

# Compute basin-wide monthly precipitation
precip_series = ds.precip_monthly.mean(['lon','lat']).values

## Decomposing a computational workflow

Functions represent... Which parts of our workflow are general enough to re-use? We might write these as functions.

An example of a re-useable function from the previous lesson (M2) is the Hargreaves equation for potential evapotranspiration (PET). Although it's a rough approximation of PET, the function we wrote can be used anywhere.

## Version control for research code

### Initializing a `git` repository

### Tracking changes to research code

### Finalizing changes

---

## Updating research software

In [None]:
def potential_et(toa_radiation, temp_max, temp_min, temp_mean):
    '''
    Calculates potential evapotranspiration, according to the Hargreaves
    equation:

    PET = 0.0023 * R * sqrt(Tmax - Tmin) * (Tmean + 17.8)

    Where R is the top-of-atmosphere (TOA) radiation (mm month-1); Tmax and 
    Tmin are the maximum and minimum monthly air temperatures (degrees C),
    respectively; and Tmean is monthly mean air temperature (degrees C).

    Parameters
    ----------
    toa_radiation : Number
        The top-of-atmosphere (TOA) radiation (mm day-1)
    temp_max : Number
        Maximum monthly air temperature (degrees C)
    temp_min : Number
        Minimum monthly air temperature (degrees C)
    temp_mean : Number
        Average monthly air temperature (degrees C)

    Returns
    -------
    Number
        The potential evapotranspiration (PET) in [mm day-1]
    '''
    return 0.0023 * toa_radiation * np.sqrt(temp_max - temp_min) * (temp_mean + 17.8)


def toa_radiation(latitude, doy):
    '''
    Top-of-atmosphere (TOA) radiation for a given latitude (L) and day of year
    (DOY) can be calculated as:

    R = ((24 * 60) / pi) * G * d * (w * sin(L) * sin(D) + cos(L) * cos(D) * sin(w))

    Where G is the solar constant, 0.0820 [MJ m-2 day-1]; d is the (inverse) 
    relative earth-sun distance; w is the sunset hour angle; and D is the solar
    declination angle.
    
    For more information, consult the FAO documentation:

        https://www.fao.org/4/X0490E/x0490e07.htm#radiation
    
    Parameters
    ----------
    latitude : float
        The latitude on earth, in degrees, where southern latitudes
        are represented as negative numbers
    doy : int
        The day of the year (DOY), an integer on [1,366]
    
    Returns
    -------
    Number
        Top-of-atmosphere (TOA) radiation, in [MJ m-2 day-1]
    '''
    assert isinstance(doy, int) or issubclass(doy.dtype.type, np.integer), 'The "doy" argument must be an integer'
    assert np.all(doy >= 1) and np.all(doy <= 366), 'The "doy" argument must be between 1 and 366, inclusive'
    
    solar_constant = 0.0820 # [MJ m-2 day-1]
    pi = 3.14159
    
    # Convert latitude from degrees to radians
    latitude_radians = np.deg2rad(latitude)
    # Inverse Earth-Sun distance (relative), as a function of day-of-year (DOY)
    earth_sun_dist = 1 + 0.0033 * np.cos((doy * 2 * pi) / 365)
    # Solar declination, as a function of DOY
    declination = 0.409 * np.sin(((doy * 2 * pi) / 365) - 1.39)
    
    # Sunset hour angle; we use np.where() below to guard against
    #   warnings where arccos() would return invalid values, which
    #   happens when the argument is outside [-1, 1]
    _hour_angle = -np.tan(latitude_radians) * np.tan(declination)
    _hour_angle = np.where(np.abs(_hour_angle) > 1, np.nan, _hour_angle)
    sunset_hour_angle = np.arccos(_hour_angle)

    # Incident radiation, depends only on the relative earth-sun distance
    inc_radiation = ((24 * 60) / pi) * solar_constant * earth_sun_dist
    return inc_radiation * (sunset_hour_angle * np.sin(latitude_radians) * np.sin(declination) +
            np.cos(latitude_radians) * np.cos(declination) * np.sin(sunset_hour_angle))