To dos  
1) Handle custom bin widths of histograms for categorical datasets  
2) Generate preprocessing scripts for other datasets  

In [1]:
from datetime import date
from datetime import timedelta
import os
import datetime
import pandas as pd
import ee
import geemap
import os
import eeDatabase_coreMethods as eedb_cor
import eeDatabase_collectionMethods as eedb_col
import ee

# ee.Authenticate()
ee.Initialize()

Map = geemap.Map()

## Define Parameters and Load datasets

In [85]:
# ------------------------------------- Define parameters -----------------------------------------------

# Define time period to export
start_date = datetime.datetime(2022, 1, 1)
end_date = datetime.datetime(2022, 6, 1)

# Define whether to initialize new image collection or append to existing image collection
process = 'append' # 'initialize' or 'append'


# -------------------------------- Define input Image Collection ----------------------------------------

# Define input dataset
# See dictionary below for list of input datasets
in_ic_name = 'MOD16_ET'

# Define variable from dataset
# See dictionary below for variables available for each dataset
var_name = 'ET'


# ------------------------------- Define input Feature Collection ---------------------------------------

# Define input path for Feature Collection
in_fc_path = 'projects/dri-apps/assets/blm-admin/BLM_Natl_Grazing_Allotment_Polygons_Simplified_clean'
in_fc = ee.FeatureCollection(in_fc_path)

# Specify ID property
in_fc_id = "ALLOT_ID"

# # Subset by geometry
# geometry = ee.Geometry.Polygon([[[-108.4020, 38.7855], [-108.4020, 39.6080], [-109.1823, 39.6080], [-109.1823, 38.7855]]], None, False);
# in_fc = in_fc.filterBounds(geometry)

# Use full Feature Collection
in_fc = in_fc

In [86]:
# Define input Image Collection paths using dataset dictionary
in_ic_dict = {'GridMET_Drought': {'in_ic_paths': ['GRIDMET/DROUGHT'],
                                  'var_names': ['long_term_drought_blend', 'short_term_drought_blend'],
                                  'var_type': 'categorical'},
             'RAP_Cover': {'in_ic_paths': ['projects/rap-data-365417/assets/vegetation-cover-v3'],
                           'var_names': ['AFG', 'BGR', 'LTR', 'PFG', 'SHR', 'TRE'],
                           'var_type': 'continuous'},
             'RAP_Production': {'in_ic_paths': ['projects/rap-data-365417/assets/npp-partitioned-v3'],
                                'var_names': ['afgAGB', 'pfgAGB', 'herbaceousAGB'],
                                'var_type': 'continuous'},
             'USDM': {'in_ic_paths': ['projects/climate-engine/usdm/weekly'],
                      'var_names': ['drought'],
                      'var_type': 'categorical'},
             'MOD11_LST': {'in_ic_paths': ['MODIS/061/MOD11A2'],
                           'var_names': ['LST_Day_1km'],
                           'var_type': 'continuous'},
             'MOD16_ET': {'in_ic_paths': ['MODIS/061/MOD16A2'],
                          'var_names': ['ET', 'PET'],
                          'var_type': 'continuous'}}

# Define land unit names
if(in_fc_path == 'projects/dri-apps/assets/blm-admin/BLM_Natl_Grazing_Allotment_Polygons_Simplified_clean'):
    land_unit_long = 'blm-natl-grazing-allotment-polygons'
    land_unit_short = 'blm-allotments'
elif(in_fc_path == 'projects/dri-apps/assets/blm-admin/BLM_Natl_Admu_FieldOffice_Polygons'):
    land_unit_long = 'blm-natl-fieldoffice-polygons'
    land_unit_short = 'blm-fieldoffices'
elif(in_fc_path == 'projects/dri-apps/assets/blm-admin/BLM_Natl_Admu_DistrictOffice_Polygons'):
    land_unit_long = 'blm-natl-districtoffice-polygons'
    land_unit_short = 'blm-districtoffices'
elif(in_fc_path == 'projects/dri-apps/assets/blm-admin/BLM_Natl_Admu_StateOffice_Polygons'):
    land_unit_long = 'blm-natl-stateoffice-polygons'
    land_unit_short = 'blm-stateoffices'
    
# Pull out additional variables needed to run exports
in_ic_paths = in_ic_dict.get(in_ic_name).get('in_ic_paths')
var_type = in_ic_dict.get(in_ic_name).get('var_type')
out_path = f"projects/dri-apps/assets/blm-database/{land_unit_short.replace('-', '')}-{in_ic_name.replace('_', '').lower()}-{var_name.replace('_', '')}"

## Initialize the EE Image Collection and add ID band

In [81]:
if process == 'initialize':
       
    # Create dictionary of properties
    properties = {'system:index': '0_id', 'land_unit': land_unit_long, 'in_fc_path': in_fc_path, 'in_fc_id': in_fc_id,
                  'in_ic_paths': in_ic_paths[0], 'in_ic_name': in_ic_name, 'var_type': var_type, 'var_name': var_name}
    
    # Apply ID image function to input feature collection
    out_list = eedb_cor.generate_id_img(in_fc = in_fc, in_fc_id = in_fc_id)
    out_i = ee.Image(out_list.get(0))
    out_fc = ee.FeatureCollection(out_list.get(1))
    
    # Generate empty Image Collection asset to append images
    os.system(f"earthengine create collection {out_path}")
    
    # Export ID image to new Image Collection
    task = ee.batch.Export.image.toAsset(
        image = out_i.set(properties),
        description = f"Initialize - {land_unit_short.replace('-', '')} {in_ic_name.replace('_', '').lower()} {var_name.replace('_', '')} - id",
        assetId = out_path + '/0_id',
        region = out_fc.geometry().buffer(20),
        scale = 22.264,
        maxPixels = 1e13)
    task.start()

else:
    print('Identify "initialize" as your process to run this cell.')

## Pre-process input image collection and loop over dates to produce and export database images

In [87]:
if process == 'append':

    # ------------------------ Preprocess input Image Collection based on path --------------------------
    
    if in_ic_paths == ['GRIDMET/DROUGHT']:
    
        # Run function to pre-process the GridMET drought data
        in_i = eedb_col.preprocess_gm_drought(in_ic_paths, var_name, start_date, end_date)
    
        # Get list of date strings from image
        in_dates = in_i.bandNames().getInfo()
        print(in_dates)
    
    elif in_ic_paths == ['projects/rap-data-365417/assets/vegetation-cover-v3'] or in_ic_paths == ['projects/rap-data-365417/assets/npp-partitioned-v3']:
    
        # Run function to pre-process the GridMET drought data
        in_i = eedb_col.preprocess_rap(in_ic_paths, var_name, start_date, end_date)
    
        # Get list of date strings from image
        in_dates = in_i.bandNames().getInfo()
        print(in_dates)

    elif in_ic_paths == ['projects/climate-engine/usdm/weekly']:
    
        # Run function to pre-process the GridMET drought data
        in_i = eedb_col.preprocess_usdm(in_ic_paths, var_name, start_date, end_date)
    
        # Get list of date strings from image
        in_dates = in_i.bandNames().getInfo()
        print(in_dates)

    elif in_ic_paths == ['MODIS/061/MOD11A2']:
    
        # Run function to pre-process the GridMET drought data
        in_i = eedb_col.preprocess_modlst(in_ic_paths, var_name, start_date, end_date)
        
        # Get list of date strings from image
        in_dates = in_i.bandNames().getInfo()
        print(in_dates)
    
    elif in_ic_paths == ['MODIS/061/MOD16A2']:
        
        # Run function to pre-process the GridMET drought data
        in_i = eedb_col.preprocess_modet(in_ic_paths, var_name, start_date, end_date)
        
        # Get list of date strings from image
        in_dates = in_i.bandNames().getInfo()
        print(in_dates)
    

    # ---------------------------- Iterate over in_dates with functions ---------------------------------
    
    for in_date in in_dates:
    
        print('Running ' + in_date)
    
        # Select date band for single date
        in_i_date = in_i.select([in_date])
    
        # Create dictionary of properties        
        properties = {'system:index': in_date, 'land_unit': land_unit_long, 'in_fc_path': in_fc_path,\
                      'in_fc_id': in_fc_id, 'in_ic_paths': in_ic_paths[0], 'in_ic_name': in_ic_name, 'var_type': var_type,\
                      'var_name': var_name}

        if var_type == 'continuous':
        
            # Run function to get time-series statistics for input feature collection
            out_fc = eedb_cor.img_to_pts_continuous(in_i_date, in_fc)
        
            # Convert centroid time-series to image collection time-series
            out_i = eedb_cor.pts_to_img_continuous(in_fc = out_fc)
        
        elif var_type == 'categorical':
        
            # Run function to get time-series statistics for input feature collection for continuous variables
            out_fc = eedb_cor.img_to_pts_categorical(in_i_date, in_fc)
       
            # Convert centroid time-series to image collection time-series
            out_i = eedb_cor.pts_to_img_categorical(in_fc = out_fc)
        
        # Create out region for export
        out_region = out_fc.geometry().buffer(20)
        
        # Export the image
        eedb_cor.export_img(out_i = out_i, out_region = out_region, out_path = out_path, out_id = in_date, properties = properties)

else:
    print('Identify "initialize" as your process to run this cell.')

['20220101', '20220109', '20220117', '20220125', '20220202', '20220210', '20220218', '20220226', '20220306', '20220314', '20220322', '20220330', '20220407', '20220415', '20220423', '20220501', '20220509', '20220517', '20220525']
Running 20220101
Running 20220109
Running 20220117
Running 20220125
Running 20220202
Running 20220210
Running 20220218
Running 20220226
Running 20220306
Running 20220314
Running 20220322
Running 20220330
Running 20220407
Running 20220415
Running 20220423
Running 20220501
Running 20220509
Running 20220517
Running 20220525
