# Download and process Combined MODIS LAI into a set of geotiffs at native resolution

In [1]:
import os
import sys
import shutil
import tarfile
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

### Config handling                                                                                        

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [3]:
# Get the required info from the config file
data_path            = cs.read_from_config(config_file,'data_path')
geospatial_temp_path = cs.read_from_config(config_file,'geospatial_temp_path')
lai_path             = cs.read_from_config(config_file,'lai_path')
lai_url              = cs.read_from_config(config_file,'lai_url')
doc_url              = cs.read_from_config(config_file,'lai_docs')
download_area        = cs.read_from_config(config_file,'geospatial_area')

### Setup

In [4]:
# Temporary for downloads
data_path = 'D:/CAMELS_spat/'

In [5]:
# Folders
download_folder = Path(data_path) / geospatial_temp_path / lai_path / 'download'
raw_folder = Path(data_path) / geospatial_temp_path / lai_path / 'raw'
doc_folder = Path(data_path) / geospatial_temp_path / lai_path

In [6]:
download_folder.mkdir(parents=True, exist_ok=True)
raw_folder.mkdir(parents=True, exist_ok=True)

In [7]:
# Convert subsetting area into a usable GDAL setting
# subset_area = [lon_min, lon_max, lat_min, lat_max]
# GDAL window = [ulx, uly, lrx, lry]; [upper left x, upper left y, lower right x, lower right y]
# Mapping:
#   ulx = lon_min = subset_area[0]
#   uly = lat_max = subset_area[3]
#   lrx = lon_max = subset_area[1]
#   lry = lat_min = subset_area[2]
subset_coor = download_area.split(',')
window = [subset_coor[0], subset_coor[3], subset_coor[1], subset_coor[2]]

### Processing
The sheer size of the data compared to what we need (300MB per day, 25% or so of which we want), it makes sense to do the whole thing in one big loop and limit disk space usage that way.

In [8]:
# Find all folders we wish to process (each contains a global map of satellite data, 8-day revisit period)
folder_urls = cs.find_folders_on_webpage(lai_url, product='MCD15A2H.061')

In [30]:
for folder_url in folder_urls:

    # Find which individual files this online folder contains
    file_urls = cs.find_file_urls_in_webpage_folders([folder_url], extension='.hdf')

    # Check if we already have processed the files contained in this web folder and skip if so
    if cs.check_modis_interrupt_status(file_urls[0],raw_folder):
        print(f'NOTE: {folder_url} has already been processed. Skipping.')
        continue

    # Download the files
    for url in file_urls:
        sub_folder = cs.download_modis_into_day_folder(download_folder, url)

    # Merge daily files into a single geotiff of domain of interest
    cs.process_daily_modis_hdf_to_tif(sub_folder, raw_folder,
                                      subdataset_front='HDF4_EOS:EOS_GRID',
                                      subdataset_back='MOD_Grid_MOD15A2H:Lai_500m',
                                      to_CRS='EPSG:4326',
                                      subset_window=window)

    # Remove the download folder to save space
    shutil.rmtree(sub_folder)

NOTE: https://e4ftl01.cr.usgs.gov/MOTA/MCD15A2H.061/2002.07.04/ has already been processed. Skipping.
NOTE: https://e4ftl01.cr.usgs.gov/MOTA/MCD15A2H.061/2002.07.12/ has already been processed. Skipping.
NOTE: https://e4ftl01.cr.usgs.gov/MOTA/MCD15A2H.061/2002.07.20/ has already been processed. Skipping.
NOTE: https://e4ftl01.cr.usgs.gov/MOTA/MCD15A2H.061/2002.07.28/ has already been processed. Skipping.
NOTE: https://e4ftl01.cr.usgs.gov/MOTA/MCD15A2H.061/2002.08.05/ has already been processed. Skipping.
NOTE: https://e4ftl01.cr.usgs.gov/MOTA/MCD15A2H.061/2002.08.13/ has already been processed. Skipping.
NOTE: https://e4ftl01.cr.usgs.gov/MOTA/MCD15A2H.061/2002.08.21/ has already been processed. Skipping.
NOTE: https://e4ftl01.cr.usgs.gov/MOTA/MCD15A2H.061/2002.08.29/ has already been processed. Skipping.
NOTE: https://e4ftl01.cr.usgs.gov/MOTA/MCD15A2H.061/2002.09.06/ has already been processed. Skipping.
NOTE: https://e4ftl01.cr.usgs.gov/MOTA/MCD15A2H.061/2002.09.14/ has already been p

In [31]:
# Get the legend
cs.download_url_into_folder(doc_url, doc_folder)

Successfully downloaded https://lpdaac.usgs.gov/documents/926/MOD15_User_Guide_V61.pdf
