### Crop Type Asset v2024b Generation Tool

This notebook was used to generate the initial v2024b crop type assets.  Currently outside of California, the v2024b assets are a direct copy of the v2024a assets.  Within California, the v2024a assets were modified to set some "grape" (code 69) classes to a custom "table grape" (code 78) class.  

In [1]:
import time
import ee

In [2]:
ee.Initialize(project='ee-cmorton', opt_url='https://earthengine-highvolume.googleapis.com')

In [3]:
legacy_project_folder = 'projects/earthengine-legacy/assets/projects/openet'
cloud_project_folder = 'projects/openet/assets'

public_policy = {'bindings': [{'role': 'roles/viewer', 'members': ['allUsers']}]}

def build_folders(folder_id, set_public=False):
    """Build the asset folder including parents"""
    # Build any missing folders above the projects/openet/assets level
    # Note, this is hard coded for building folder in projects/openet/assets
    folder_id_split = folder_id.split('/')
    for i in range(len(folder_id_split)):
        folder_id = '/'.join(folder_id_split[:i])
        print(folder_id)
        if 'projects/openet/' not in folder_id:
            continue
        elif 'projects/openet/assets' == folder_id:
            continue
        elif not ee.data.getInfo(folder_id):
            print(f'  Building folder: {folder_id}')
            ee.data.createAsset({'type': 'FOLDER'}, folder_id)
            if set_public:
                ee.data.setIamPolicy(folder_id, public_policy)
        
        
def build_image_collection(coll_id, set_public=False):
    """Build the image collection"""
    build_folders(coll_id, set_public=False)
    if not ee.data.getInfo(coll_id):
        ee.data.createAsset({'type': 'IMAGE_COLLECTION'}, coll_id)
        time.sleep(1)
        if set_public:
            ee.data.setIamPolicy(coll_id, public_policy)
        

def copy_image_list(src_coll_id, dst_coll_id, start_date=None, end_date=None, overwrite=False, verbose=True, src_filter=None):
    """Identify the source images that should be copied to the destination image collection"""
    src_coll = ee.ImageCollection(src_coll_id)
    dst_coll = ee.ImageCollection(dst_coll_id)

    if start_date and end_date:
        src_coll = src_coll.filterDate(start_date, end_date)
        dst_coll = dst_coll.filterDate(start_date, end_date)

    if src_filter:
        src_coll = src_coll.filter(src_filter)
    
    src_ids = src_coll.aggregate_array('system:index').getInfo()
    dst_ids = dst_coll.aggregate_array('system:index').getInfo()    
    if verbose:
        print(f'  Src. assets: {len(src_ids)}')
        # print(f'  Dst. assets: {len(dst_ids)}')

    if overwrite:
        image_ids = sorted(list(set(src_ids)))
    else:
        image_ids = sorted(list(set(src_ids) - set(dst_ids)))
    # image_ids = sorted(image_ids, key=lambda k: k.split('/')[-1].split('_')[-2], reverse=True)
    
    return image_ids
        

# pprint.pprint(copy_image_list(
#     'projects/earthengine-legacy/assets/projects/openet/mgrs/global_era5land/zone_mask',
#     'projects/openet/assets/mgrs/global/era5land/zone_mask',
#     overwrite=True,
# ))
        

### Updated code for generating v2024b assets based on Grape type and dominance assets

In [4]:
# Export the existing v2024a images to a new v2024b collection
src_coll_id = 'projects/openet/assets/crop_type/v2024a'
dst_coll_id = 'projects/openet/assets/crop_type/v2024b'

start_date = '1985-01-01'
end_date = '2025-01-01'

overwrite_flag = False

# Rasters from Pesticide Use Reports Analysis
# Wine Grape Flag (1) other grape (0)
flag_raster = ee.Image("projects/openet/assets/crop_type/california/grapes/winegrape_flag")
#flag_raster = ee.Image("projects/ee-joserdgz/assets/rasters/winegrapeflag")

# Ratio dominant (flagged) grape to total grapes 
ratio_raster = ee.Image("projects/openet/assets/crop_type/california/grapes/grape_totalgrapes")
#ratio_raster = ee.Image("projects/ee-joserdgz/assets/rasters/grape_totalgrapes")

if not ee.data.getInfo(src_coll_id):
    print(' Source collection does not exist, skipping')
    raise Exception
    

### Build the output collection if necessary

In [5]:
# if not ee.data.getInfo(dst_coll_id):
#     build_image_collection(dst_coll_id, set_public=True)


### Copy the existing v2024a assets over to the new v2024b collection

In [6]:
# # Copy the current assets over to the new collection
# print('\nCopying source images to destination collection')
# image_id_list = copy_image_list(src_coll_id, dst_coll_id, start_date, end_date, overwrite_flag)
# image_id_list = sorted(image_id_list, key=lambda k: k.split('/')[-1], reverse=False)
# print(f'  Copy assets: {len(image_id_list)}')

# # First copy all of the images over to the new collection as is
# for i, image_id in enumerate(image_id_list):     
#     if i % 100 == 0:
#         print(i, image_id)

#     try:
#         ee.data.copyAsset(f'{src_coll_id}/{image_id}', f'{dst_coll_id}/{image_id}', True)
#     except Exception as e:
#         print(e)
#         continue


### Export the target tiles and dates with the value fix

Note, this block will always overwrite

In [10]:
# print('\nExporting modified California tiles')
# image_id_list = copy_image_list(src_coll_id, dst_coll_id, start_date, end_date, overwrite=True)
# image_id_list = sorted(image_id_list, key=lambda k: k.split('/')[-1].split('_')[-1], reverse=True)
# for i, image_id in enumerate(image_id_list):
#     if image_id.split('_')[0] not in ['10S', '11S']:
#         continue
#     print(image_id)

#     src_image_id = f'{src_coll_id}/{image_id}'
#     dst_image_id = f'{dst_coll_id}/{image_id}'

#     if ee.data.getInfo(dst_image_id):
#         ee.data.deleteAsset(dst_image_id)
    
#     input_img = ee.Image(src_image_id)
#     image_info = input_img.getInfo()
#     # pprint.pprint(image_info)

#     # Assume an existing CDL 69 pixel is not wine grapes 
#     # if the wine grape flag is 0 and the dominant ratio is greater than 0.65
#     output_img = input_img.where(
#         input_img.eq(69).And(ratio_raster.gt(0.65)).And(flag_raster.eq(0)), 
#         78
#     )

#     task = ee.batch.Export.image.toAsset(
#         output_img,
#         description=dst_image_id.replace('/', '_'),
#         assetId=dst_image_id,
#         dimensions=image_info['bands'][0]['dimensions'],
#         crs=image_info['bands'][0]['crs'],
#         crsTransform=image_info['bands'][0]['crs_transform'],
#         maxPixels=1000000000000,
#         pyramidingPolicy={'cropland': 'mode'},
#     )
#     task.start()
#     print('  export task started')

# print('\nDone')


### Update the properties to add the build_status and build_date

In [8]:
print('\nUpdate the image properties for all images')
image_id_list = copy_image_list(src_coll_id, dst_coll_id, start_date, end_date, overwrite=True)
image_id_list = sorted(image_id_list, key=lambda k: k.split('/')[-1], reverse=False)
print(f'  Copy assets: {len(image_id_list)}')

for i, image_id in enumerate(image_id_list):
    if i % 100 == 0:
        print(i, image_id)
    # print(image_id)

    mgrs = image_id.split('_')[0]
    year = int(image_id.split('_')[1][:4])
    
    if mgrs in ['10S', '10T', '11S'] and (year == 2024):
        status = 'provisional'
    else:
        status = 'permanent'

    print(image_id)
    # try:
    #     ee.data.updateAsset(
    #         asset_id=f'{dst_coll_id}/{image_id}', 
    #         asset={'properties': {
    #             'build_status': status, 
    #             # TODO: Check if this can be computed and not hardcoded
    #             # 'build_date': '2025-07-29',
    #             # 'date_ingested': None,
    #         }}, 
    #         update_mask=[
    #             'properties.build_status', 
    #             # 'properties.build_date', 
    #             # 'properties.date_ingested', 
    #         ],
    #     )
    # except Exception as e:
    #     print(e)

print('\nDone')



Update the image properties for all images
  Src. assets: 1404
  Copy assets: 1404
0 10S_19850101
Asset 'projects/openet/assets/crop_type/v2024b/10S_19850101' does not exist or doesn't allow this operation.
Asset 'projects/openet/assets/crop_type/v2024b/10S_19860101' does not exist or doesn't allow this operation.
Asset 'projects/openet/assets/crop_type/v2024b/10S_19870101' does not exist or doesn't allow this operation.
Asset 'projects/openet/assets/crop_type/v2024b/10S_19880101' does not exist or doesn't allow this operation.
Asset 'projects/openet/assets/crop_type/v2024b/10S_19890101' does not exist or doesn't allow this operation.
Asset 'projects/openet/assets/crop_type/v2024b/10S_19900101' does not exist or doesn't allow this operation.
Asset 'projects/openet/assets/crop_type/v2024b/10S_19910101' does not exist or doesn't allow this operation.
Asset 'projects/openet/assets/crop_type/v2024b/10S_19920101' does not exist or doesn't allow this operation.
Asset 'projects/openet/assets

### Original code for generating v2024b assets based on simple county/polygon masks

In [9]:
# # Export the existing v2024a images to a new v2024b collection
# # For the California tiles that include the five target counties, swap all values of 69 with 78

# src_coll_id = 'projects/openet/assets/crop_type/v2024a'
# dst_coll_id = 'projects/openet/assets/crop_type/v2024b'

# start_date = '1980-01-01'
# end_date = '2024-01-01'
# #start_date = '2024-01-01'
# #end_date = '2025-01-01'

# overwrite_flag = True


# # Fresno, Kern, Riverside, San Bernadino, and Tulare Counties
# #ee.FeatureCollection("TIGER/2018/Counties")
# #.filterMetadata('STATEFP', 'equals', '06')
# #.filter(ee.Filter.inList('GEOID', ['06019', '06029', '06065', '06071', '06107']))
# #.map(lambda ftr: ftr.set({'MASK': 1}))

# # Manually defining the bounding extent for the replacement
# counties_mask = (
#     ee.FeatureCollection([
#         ee.Feature(ee.Geometry.Polygon([[
#           [-119.3, 34.8], [-118.8, 34.8], [-118.6, 35.5], [-118.9, 36.3],
#           [-119.1, 36.4], [-119.4, 36.3], [-119.9, 35.4], [-119.3, 34.8]
#         ]]), {'MASK': 1}),
#         ee.Feature(ee.Geometry.BBox(-116.5, 34.0, -115.5, 33.0), {'MASK': 1}),
#     ])
#     .map(lambda ftr: ftr.set({'MASK': 1}))
#     .reduceToImage(['MASK'], ee.Reducer.first())
#     .uint8()
# )

# # if not ee.data.getInfo(src_coll_id):
# #     print(' Source collection does not exist, skipping')
# #     raise Exception
# # if not ee.data.getInfo(dst_coll_id):
# #     build_image_collection(dst_coll_id, set_public=True)


# # # Copy the current assets over to the new collection
# # print('\nCopying source images to destination collection')
# # image_id_list = copy_image_list(src_coll_id, dst_coll_id, start_date, end_date, overwrite_flag)
# # image_id_list = sorted(image_id_list, key=lambda k: k.split('/')[-1], reverse=False)
# # print(f'  Copy assets: {len(image_id_list)}')

# # # First copy all of the images over to the new collection as is
# # for i, image_id in enumerate(image_id_list):     
# #     if i % 100 == 0:
# #         print(i, image_id)

# #     try:
# #         ee.data.copyAsset(f'{src_coll_id}/{image_id}', f'{dst_coll_id}/{image_id}', True)
# #     except Exception as e:
# #         print(e)
# #         continue


# # Then export the target tiles and dates with the value fix
# print('\nExporting modified California tiles')
# image_id_list = copy_image_list(src_coll_id, dst_coll_id, start_date, end_date, overwrite=True)
# image_id_list = sorted(image_id_list, key=lambda k: k.split('/')[-1], reverse=False)
# for i, image_id in enumerate(image_id_list):
#     if image_id.split('_')[0] not in ['11S']:
#         continue
#     print(image_id)

#     src_image_id = f'{src_coll_id}/{image_id}'
#     dst_image_id = f'{dst_coll_id}/{image_id}'

#     if ee.data.getInfo(dst_image_id):
#         ee.data.deleteAsset(dst_image_id)
    
#     input_img = ee.Image(src_image_id)
#     image_info = input_img.getInfo()
#     # pprint.pprint(image_info)

#     # Set the crop type to table grapes for orchards in the target counties
#     output_img = input_img.where(input_img.eq(69).And(counties_mask), 78)

#     task = ee.batch.Export.image.toAsset(
#         output_img,
#         description=dst_image_id.replace('/', '_'),
#         assetId=dst_image_id,
#         dimensions=image_info['bands'][0]['dimensions'],
#         crs=image_info['bands'][0]['crs'],
#         crsTransform=image_info['bands'][0]['crs_transform'],
#         maxPixels=1000000000000,
#         pyramidingPolicy={'cropland': 'mode'},
#     )
#     task.start()
