To dos
1) Update naming convention for outputs: LandUnit-Dataset-VariableName/YYYYMMDD|id
2) Add function metadata to core scripts
2) Fix categorical functions issues (export errors and bad outputs)   
3) Add checks whether image asset already exists  
4) Handle custom bin widths of histograms for categorical datasets  
5) Generate preprocessing scripts for other datasets  

In [2]:
from datetime import date
from datetime import timedelta
import os
import datetime
import pandas as pd
import ee
import geemap
import os
import eeDatabase_coreMethods as eedb_cor
import eeDatabase_collectionMethods as eedb_col
import ee

# ee.Authenticate()
ee.Initialize()

Map = geemap.Map()

## Define Parameters and Load datasets

In [36]:
# ------------------------------------- Define parameters -----------------------------------------------

# Define time period to export
start_date = datetime.datetime(2022, 1, 1)
end_date = datetime.datetime(2022, 2, 1)

# Define whether to initialize new image collection or append to existing image collection
process = 'append' # 'initialize' or 'append'


# -------------------------------- Define input Image Collection ----------------------------------------

# Define input Image Collection paths using dataset dictionary
in_ic_dict = {'GridMET_Drought': {'in_ic_paths': ['GRIDMET/DROUGHT'],
                                  'var_names': ['long_term_drought_blend', 'short_term_drought_blend'],
                                  'var_type': 'continuous'},
             'RAP_Cover': {'in_ic_paths': ['projects/rangeland-analysis-platform/vegetation-cover-v3'],
                           'var_names': ['AFG', 'BGR', 'LTR', 'PFG', 'SHR', 'TRE'],
                           'var_type': 'continuous'},
             'RAP_Production': {'in_ic_paths': ['projects/rangeland-analysis-platform/npp-partitioned-v3'],
                                'var_names': ['afgAGB', 'pfgAGB', 'herbaceousAGB'],
                                'var_type': 'continuous'},
             'USDM': {'in_ic_paths': ['projects/climate-engine/usdm/weekly'],
                      'var_names': ['usdm'],
                      'var_type': 'categorical'},
             'MOD11_LST': {'in_ic_paths': ['MODIS/061/MOD11A2'],
                           'var_names': ['LST_Day_1km'],
                           'var_type': 'continuous'},
             'MOD16_ET': {'in_ic_paths': ['MODIS/061/MOD16A2'],
                          'var_names': ['ET', 'PET'],
                          'var_type': 'continuous'}}

in_ic = 'GridMET_Drought'
in_ic_paths = in_ic_dict.get(in_ic).get('in_ic_paths')

# Define variable to generate database table for
var_name = 'long_term_drought_blend'
var_type = in_ic_dict.get(in_ic).get('var_type') # 'continuous' or 'categorical'


# ------------------------------- Define input Feature Collection ---------------------------------------

in_fc_path = 'projects/dri-apps/assets/blm-admin/BLM_Natl_Grazing_Allotment_Polygons_Simplified_clean'
in_fc = ee.FeatureCollection(in_fc_path)
land_unit = 'blm-natl-grazing-allotment-polygons'

# Subset by geometry
geometry = ee.Geometry.Polygon([[[-108.4020, 38.7855], [-108.4020, 39.6080], [-109.1823, 39.6080], [-109.1823, 38.7855]]], None, False);
in_fc = in_fc.filterBounds(geometry)
# Specify ID property
in_fc_id = "ALLOT_ID"

# # Use full Feature Collection
# in_fc = in_fc
# # Specify ID property
# in_fc_id = "ALLOT_ID"


# --------------------------------- Define database output path -----------------------------------------
var_name_exp = var_name.replace('_', '-')
out_path = f"projects/dri-apps/assets/blm-database/{var_name_exp}"

In [13]:
print(in_fc.size().getInfo())

21591


## Initialize the EE Image Collection and add ID band

In [37]:
if process == 'initialize':
    
    # Create dictionary of properties
    properties = {'land-unit': land_unit, 'in-fc-path': in_fc_path, "in-fc-id": in_fc_id, "in-ic-path": in_ic_path,
                  "var-type": var_type, "var-name": var_name}
    
    # Apply ID image function to input feature collection
    out_list = eedb_cor.generate_id_i(in_fc, properties)
    out_i = ee.Image(out_list.get(0))
    out_fc = ee.FeatureCollection(out_list.get(1))
    
    # Generate empty Image Collection asset to append images
    os.system(f"earthengine create collection {out_path}")
    
    # Export ID image to new Image Collection
    task = ee.batch.Export.image.toAsset(
    image = out_i,
    description = f'Initialize - {var_name_exp} - id',
    assetId = out_path + '/id',
    region = out_fc.geometry().buffer(20),
    scale = 22.264,
    maxPixels = 1e13)
    task.start()

else:
    print('Identify "Initialize" as your process to run this cell.')

Identify "Initialize" as your process to run this cell.


## Pre-process image collection for generating database

In [38]:
if in_ic_paths == ['GRIDMET/DROUGHT']:
    
    # Run function to pre-process the GridMET drought data
    in_i = eedb_col.preprocess_gm_drought(in_ic_paths, var_name, start_date, end_date)
    
    # Get list of date strings from image
    in_dates = in_i.bandNames().getInfo()
    print(in_dates)
    
elif in_ic_paths == ['projects/rangeland-analysis-platform/vegetation-cover-v3'] or in_ic_paths == ['projects/rangeland-analysis-platform/npp-partitioned-v3']:
    
    # Run function to pre-process the GridMET drought data
    in_i = eedb_col.preprocess_rap(in_ic_paths, var_name, start_date, end_date)
    
    # Get list of date strings from image
    in_dates = in_i.bandNames().getInfo()
    print(in_dates)

elif in_ic_paths == ['projects/climate-engine/usdm/weekly']:
    
    # Run function to pre-process the GridMET drought data
    in_i = eedb_col.preprocess_usdm(in_ic_paths, var_name, start_date, end_date)
    
    # Get list of date strings from image
    in_dates = in_i.bandNames().getInfo()
    print(in_dates)

elif in_ic_paths == ['MODIS/006/MOD11A2']:
    
    # Run function to pre-process the GridMET drought data
    in_i = eedb_col.preprocess_modlst(in_ic_paths, var_name, start_date, end_date)
    
    # Get list of date strings from image
    in_dates = in_i.bandNames().getInfo()
    print(in_dates)
    
elif in_ic_paths == ['MODIS/006/MOD16A2']:
    
    # Run function to pre-process the GridMET drought data
    in_i = eedb_col.preprocess_modet(in_ic_paths, var_name, start_date, end_date)
    
    # Get list of date strings from image
    in_dates = in_i.bandNames().getInfo()
    print(in_dates)

['20220105', '20220110', '20220115', '20220120', '20220125', '20220130']


## Loop over dates to produce database image continuous

In [40]:
if var_type == 'continuous':
    
    # Iterate over in_dates with functions
    for in_date in in_dates:
        print(in_date)
        
        # Select date band for single date
        in_i_date = in_i.select([in_date])
        
        # Run function to get time-series statistics for input feature collection for continuous variables
        out_fc = eedb_cor.img_to_pts_continuous(in_i_date, in_fc)
        # print(out_fc.first().propertyNames().getInfo())
        
        # Create dictionary of properties
        properties = {'in-date': in_date, 'land-unit': land_unit, 'in-fc-path': in_fc_path, "in-fc-id": in_fc_id, "in-ic-path": in_ic_paths[0],
                     "var-type": var_type, "var-name": var_name, 'out-path': out_path, 'var-name-exp': var_name_exp}
        
        # Convert centroid time-series to image collection time-series
        out_i = eedb_cor.pts_to_img_continuous(in_fc = out_fc, properties = properties)
        
        # Export the image
        eedb_cor.export_img(out_i = out_i, out_fc = out_fc, properties = properties)

else:
    print('Identify "continuous" as your var_type to run this cell.')

20220105
['var-type', 'var-name', 'land-unit', 'in-fc-path', 'in-ic-path', 'in-fc-id', 'system:index', 'system:bands', 'system:band_names']
20220110
['var-type', 'var-name', 'land-unit', 'in-fc-path', 'in-ic-path', 'in-fc-id', 'system:index', 'system:bands', 'system:band_names']
20220115
['var-type', 'var-name', 'land-unit', 'in-fc-path', 'in-ic-path', 'in-fc-id', 'system:index', 'system:bands', 'system:band_names']
20220120
['var-type', 'var-name', 'land-unit', 'in-fc-path', 'in-ic-path', 'in-fc-id', 'system:index', 'system:bands', 'system:band_names']
20220125
['var-type', 'var-name', 'land-unit', 'in-fc-path', 'in-ic-path', 'in-fc-id', 'system:index', 'system:bands', 'system:band_names']
20220130
['var-type', 'var-name', 'land-unit', 'in-fc-path', 'in-ic-path', 'in-fc-id', 'system:index', 'system:bands', 'system:band_names']


## Loop over dates to produce database image continuous

In [8]:
if var_type == 'categorical':
    # Iterate over in_dates with functions
    for in_date in in_dates:
        print(in_date)
        
        # Select date band
        in_i_date = in_i.select([in_date])
        
        # Run function to get time-series statistics for input feature collection for continuous variables
        out_fc = eedb_cor.img_to_pts_categorical(in_i_date, in_fc)
        
        # Create dictionary of properties
        properties = {'in-date': in_date, 'land-unit': land_unit, 'in-fc-path': in_fc_path, "in-fc-id": in_fc_id, "in-ic-path": in_ic_paths[0],
                     "var-type": var_type, "var-name": var_name, 'out-path': out_path, 'var-name-exp': var_name_exp}
               
        # Convert centroid time-series to image collection time-series
        out_i = eedb_cor.pts_to_img_categorical(in_fc = out_fc, properties = properties)
        
        # Export the image
        eedb_cor.export_img(out_i = out_i, out_fc = out_fc, properties = properties)
    
else:
    print('Identify "categorical" as your var_type to run this cell.')

20220105
20220110
20220115
20220120
20220125
20220130
20220204
20220209
20220214
20220219
20220224
20220301
20220306
20220311
20220316
20220321
20220326
20220331
20220405
20220410
20220415
20220420
20220425
20220430
