# Monthly LAI
We have 7-daily MODIS LAI files. For the perceptual model project, it probably makes sense to have something less impacted by day-to-day variability and short-term weather. We'll find the daily files and create monthly-averaged LAI from these.

In [1]:
import sys
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import _functions as pmf

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [3]:
# Get the required info from the config file
raw_path = pmf.read_from_config(config_file,'raw_path')
last_n_years = pmf.read_from_config(config_file,'last_n_years')

### 1. Find LAI files

In [4]:
import glob

In [5]:
src_path = Path(raw_path) / 'lai' / 'raw'

In [6]:
lai_files = sorted( glob.glob(str(src_path / '*.tif')) )

### 2. Create the output directory
In this attempt we'll base the monthly LAI values on the last 10 years, so we'll hardcode an appropriate folder name

In [8]:
des_path = Path(raw_path) / 'lai' / 'monthly_average_2013_2023'

In [9]:
des_path.mkdir(exist_ok=True, parents=True)

### 3. Create monthly average files
Averaging code adapted from: https://www.hydroshare.org/resource/1361509511e44adfba814f6950c6e742/

#### 3.1 Functions

In [10]:
from dateutil.relativedelta import relativedelta
import numpy as np
from osgeo import gdal, osr
import os
import pandas as pd
import scipy.stats as sc

In [11]:
def filter_lai_files_by_date(files, last_n_years=[], last_n_months=[], last_n_days=[],
                                    years=[], months=[], days=[]):

    '''Filters list of LAI file names by last n years/months/days and/or by year/month/day x.
       Assumes date is given as 'yyyymmdd_*.tif', as part of the filename.
       Use years/months/days (input as list) to subset further.'''

    # Check inputs
    if (last_n_years and last_n_months) or \
       (last_n_years and last_n_days) or \
       (last_n_months and last_n_days):
        print('WARNING: filter_lai_files_by_date(): specify only one of last_n_years, last_n_months, last_n_days')
        return

    # Create a DatetimeIndex from filenames
    dates = []
    for file in files:
        file_name = os.path.basename(file)
        yyyymmdd = file_name[0:8]
        dates.append(yyyymmdd)
    dti = pd.to_datetime(dates,format='%Y%m%d')

    # Find the last entry
    last_year  = dti[-1].year
    last_month = dti[-1].month
    last_day   = dti[-1].day
    
    # Select the last n entries
    if last_n_years:    start_date = dti[-1] - relativedelta(years = last_n_years)
    elif last_n_months: start_date = dti[-1] - relativedelta(months = last_n_months)
    elif last_n_days:   start_date = dti[-1] - relativedelta(days = last_n_days)
    last_n = (dti >= start_date) & (dti <= dti[-1])

    # Specify filters to include all if no specific years/months/days were requested
    if not years:  years  = list(set(dti.year))  # i.e. filter to include all unique years in dti, \
    if not months: months = list(set(dti.month)) #    else use user input
    if not days:   days   = list(set(dti.day))
    mask = dti.year.isin(years) & dti.month.isin(months) & dti.day.isin(days)

    # Return the filtered list
    return [file for file, bool1, bool2 in zip(files,last_n,mask) if bool1 and bool2]

In [12]:
def get_geotif_data_as_array(file, band=1):
    ds = gdal.Open(file) # open the file
    band = ds.GetRasterBand(band) # get the data band
    data = band.ReadAsArray() # convert to numpy array for further manipulation   
    return data

In [13]:
def enforce_data_range(data,min,max,replace_with='limit'):

    '''Clamps data at min and max values'''

    if replace_with =='limit':
        data[data<min] = min
        data[data>max] = max
    else:
        data[data<min] = replace_with
        data[data>max] = replace_with
    
    return data

In [14]:
def write_geotif_sameDomain(src_file,des_file,des_data):
    
    # load the source file to get the appropriate attributes
    src_ds = gdal.Open(src_file)
    
    # get the geotransform
    des_transform = src_ds.GetGeoTransform()

    # Get the scale factor from the source metadata
    scale_factor = src_ds.GetRasterBand(1).GetScale()
    offset = src_ds.GetRasterBand(1).GetOffset()
    
    # get the data dimensions
    ncols = des_data.shape[1]
    nrows = des_data.shape[0]
    
    # make the file
    driver = gdal.GetDriverByName("GTiff")
    dst_ds = driver.Create(des_file,ncols,nrows,1,gdal.GDT_Float32, options = [ 'COMPRESS=DEFLATE' ])
    dst_ds.GetRasterBand(1).WriteArray( des_data )

    # Set the scale factor in the destination band
    dst_ds.GetRasterBand(1).SetScale(scale_factor)
    dst_ds.GetRasterBand(1).SetOffset(offset)
    
    # Set the geotransform
    dst_ds.SetGeoTransform(des_transform)

    # Set the projection
    wkt = src_ds.GetProjection()
    srs = osr.SpatialReference()
    srs.ImportFromWkt(wkt)
    dst_ds.SetProjection( srs.ExportToWkt() )
    
    # close files
    src_ds = None
    des_ds = None

    return

#### 3.2 Processing

In [15]:
gdal.UseExceptions()

In [16]:
# Define valid data range
# See docs, Table 4: https://lpdaac.usgs.gov/documents/926/MOD15_User_Guide_V61.pdf
modis_min = 0
modis_max = 100

#### DEV(?) - I'm reasonably certain it is dev code. but not sure - check later

In [17]:
month = 1

In [18]:
print(f'Processing month {month:02d}')
month_files = filter_lai_files_by_date(lai_files, last_n_years=last_n_years, months=[month])

Processing month 01


In [19]:
month_files = [file for file in month_files if '20221016' not in file]

In [20]:
data = []
for file in month_files:
    print(f'Processing{file}')
    data_tmp = get_geotif_data_as_array(file)
    #data_tmp = enforce_data_range(data_tmp, modis_min, modis_max, replace_with='limit')
    data.append(data_tmp)

Processing/Users/wmk934/data/NorthAmerica_geospatial/lai/raw/20140101_MOD_Grid_MOD15A2H_Lai_500m.tif
Processing/Users/wmk934/data/NorthAmerica_geospatial/lai/raw/20140109_MOD_Grid_MOD15A2H_Lai_500m.tif
Processing/Users/wmk934/data/NorthAmerica_geospatial/lai/raw/20140117_MOD_Grid_MOD15A2H_Lai_500m.tif
Processing/Users/wmk934/data/NorthAmerica_geospatial/lai/raw/20140125_MOD_Grid_MOD15A2H_Lai_500m.tif
Processing/Users/wmk934/data/NorthAmerica_geospatial/lai/raw/20150101_MOD_Grid_MOD15A2H_Lai_500m.tif
Processing/Users/wmk934/data/NorthAmerica_geospatial/lai/raw/20150109_MOD_Grid_MOD15A2H_Lai_500m.tif
Processing/Users/wmk934/data/NorthAmerica_geospatial/lai/raw/20150117_MOD_Grid_MOD15A2H_Lai_500m.tif
Processing/Users/wmk934/data/NorthAmerica_geospatial/lai/raw/20150125_MOD_Grid_MOD15A2H_Lai_500m.tif
Processing/Users/wmk934/data/NorthAmerica_geospatial/lai/raw/20160101_MOD_Grid_MOD15A2H_Lai_500m.tif
Processing/Users/wmk934/data/NorthAmerica_geospatial/lai/raw/20160109_MOD_Grid_MOD15A2H_Lai

In [21]:
stacked = np.dstack(data)

In [56]:
stacked_msk = np.ma.masked_array(stacked, mask=(stacked>0) & (stacked<249))

In [54]:
lai = np.ma.mean(stacked_msk, axis=2)

In [68]:
mean_all = np.nanmean(stacked, axis=2)

In [71]:
mean_lai[mean_all>=249] = mean_all[mean_all>=249]

#### End DEV (?)

In [74]:
for month in range(1,13):
    
    # Get the files we have for this month, for the last n years
    print(f'Processing month {month:02d}')
    month_files = filter_lai_files_by_date(lai_files, last_n_years=last_n_years, months=[month])

    # Remove the one file we know is incomplete, 2022-10-16
    month_files = [file for file in month_files if '20221016' not in file]
    
    # Load the data as numpy arrays, stack vertically, and find the mean value (ignoring nan)
    data = [get_geotif_data_as_array(file) for file in month_files] # Get data as uint8
    stacked = np.dstack(data) # Create a 3D stack
    stacked_msk = np.ma.masked_array(stacked, mask=(stacked<modis_min) | (stacked>modis_max)) # Retain valid values only
    mean_lai = np.ma.mean(stacked_msk, axis=2)

    # Define the no-data locations
    #mean_all = np.nanmean(stacked, axis=2) # Any pixel that consistently has no-data in the source files (>= 249) should have a >= 249 mean
    #mean_lai[mean_all >= 249] = mean_all[mean_all >= 249] # Place the no-data values in the new monthly-mean-lai file
    
    # Define output file name and write to disk
    src_file = month_files[0] # We use this to copy over domain, projection, data scaling, etc
    des_file = str( des_path / f'2013_2023_{month:02d}_MOD_Grid_MOD15A2H_Lai_500m.tif' )
    write_geotif_sameDomain(src_file, des_file, mean_lai)

Processing month 01
Processing month 02
Processing month 03
Processing month 04
Processing month 05
Processing month 06
Processing month 07
Processing month 08
Processing month 09
Processing month 10
Processing month 11
Processing month 12


### Legacy code
The below does work (or st least it should) but it is awfully slow for larger matrices.

In [None]:
for month in range(1,13):
    
    # Get the files we have for this month, for the last n years
    print(f'Processing month {month:02d}')
    month_files = filter_lai_files_by_date(lai_files, last_n_years=last_n_years, months=[month])

    # Remove the one file we know is incomplete, 2022-10-16
    month_files = [file for file in month_files if '20221016' not in file]
    
    # Load the data as numpy arrays, stack vertically, and find the mean value (ignoring nan)
    # To do so we:
    # - get_geotif_data_as_array(file): get the GeoTIFF data in as uint8
    # - .astype(np.float32): convert uint8 to float32 so we can insert np.nan values in the same array
    # - enforce_data_range([..], modis_min, modis_max, replace_with=np.nan): do the nan replacement
    # We need the NaNs because MODIS uses 249-255 as no-data values by default, and keeping those
    #    would mess with the averaging
    data = [enforce_data_range(get_geotif_data_as_array(file).astype(np.float32),
                               modis_min, modis_max, replace_with=np.nan)
            for file in month_files]
    stacked = np.dstack(data)
    mean_lai = np.nanmean(stacked, axis = 2) # This doesn't work well for 
        
    # Define output file name and write to disk
    src_file = month_files[0] # We use this to copy over domain, projection, data scaling, etc
    des_file = str( des_path / f'2013_2023_{month:02d}_MOD_Grid_MOD15A2H_Lai_500m.tif' )
    write_geotif_sameDomain(src_file, des_file, mean_lai)