<!-- # Processing the final dataset
 -->


In [1]:
import ee
import geemap
from gee_0_utils import *

initialize()
config = ProjectConfig()
roi = config.roi
data_folder = config.data_folder

In [None]:
def keep_rare_lu_types(unified_data):
    # Define the list of bands to check
    bands_to_check = ['lulc_sum_20', 'lulc_sum_21', 'lulc_sum_35', 
                    'lulc_sum_39', 'lulc_sum_40', 'lulc_sum_41', 
                    'lulc_sum_46', 'lulc_sum_48', 'lulc_sum_9']

    # Create a mask where at least one of the specified bands is non-zero
    mask = unified_data.select(bands_to_check).reduce(ee.Reducer.anyNonZero())

    # # Apply the mask to the unified_data
    return unified_data.updateMask(mask)


In [None]:

# Function to preprocess images and create the unified data image
def create_unified_data(method, year):
    secondary = ee.Image(f"{data_folder}/{method}/secondary")

    distance_to_nearest_mature = ee.Image(f"{data_folder}/{method}/distance_to_nearest_mature")
    cwd = ee.Image(f"{data_folder}/raw/cwd_chave").int16()
    sur_cover = ee.Image(f"{data_folder}/{method}/sur_cover")
    categorical = ee.Image(f"{data_folder}/categorical").select(["biome", "ecoreg", "protec", "indig"])
    topography = ee.Image("CSP/ERGo/1_0/Global/ALOS_landforms").rename("topography")
    soil = ee.Image(f"{data_folder}/soilgrids").select(['nitro', 'sand', 'phh2o'])  # Dropping multicollinear bands
    climate = ee.Image(f"{data_folder}/yearly_terraclim")

    ages = secondary.select("age")
    # select the data points that match their classification as young or old by IPCC
    filter1 = ee.Image(f"{data_folder}/raw/00N_10N")
    filter2 = ee.Image(f"{data_folder}/raw/10N_20N")
    filter_ages = ee.ImageCollection([filter1, filter2]).mosaic()
    filter_young_secondary = filter_ages.eq(2)
    filter_old_secondary = filter_ages.eq(3)
    young_secondary = ages.lte(20).updateMask(filter_young_secondary).unmask(0)
    old_secondary = ages.gt(20).updateMask(filter_old_secondary).unmask(0)
    ages_mask = young_secondary.add(old_secondary)

    distance_to_border_mask = ee.Image(f"{data_folder}/distance_to_border_mask").byte()
    one_hectare_mask = ee.Image(f"{data_folder}/{method}/one_hectare_mask").selfMask()

    base_name = f"{data_folder}/{method}/nearest_mature_biomass_"
    suffixes = range(1, 22)
    images = [ee.Image(f"{base_name}{suffix}") for suffix in suffixes]
    image_collection = ee.ImageCollection(images)
    nearest_mature = image_collection.mosaic().rename("nearest_mature_biomass")
    if method == "mapbiomas":
        nearest_mature = nearest_mature.reproject(scale = 1000, crs = 'EPSG:4326').rename("nearest_mature_biomass")

    # Combine all bands with additional masking conditions
    combined_mask = nearest_mature.mask().And(distance_to_border_mask).And(one_hectare_mask).And(ages_mask).And(GEDI_biomass.mask())

    unified_data = secondary.addBands([ GEDI_biomass, 
        nearest_mature, sur_cover, cwd, distance_to_nearest_mature, 
        categorical, soil, topography, climate
    ])

    if method == "mapbiomas":
        # fire = ee.Image(f"{data_folder}/{method}/ESA_fires")
        # unified_data = unified_data.addBands(fire.rename("ESA"))
        # suffix = "ESA_fire"
        
        if (aggregate_LU):
            suffix = "aggregated"
        else:
            suffix = "non_aggregated"

        if (year != 0):
            suffix += f"_{year}yr"
        else:
            suffix += "_all"

        land_use = ee.Image(f"{data_folder}/{method}/land_use_{suffix}")
        unified_data = unified_data.addBands([land_use])
        combined_mask = combined_mask.And(land_use.select("last_LU").mask())
    else:
        suffix = "eu"
        fire = ee.Image(f"{data_folder}/{method}/eu_mapbiomas_fires")
        unified_data = unified_data.addBands([fire])

    unified_data = unified_data.updateMask(combined_mask)

    return unified_data, suffix

# Main Function to run tile-wise exports
def export_tilewise(method, year):
    # Generate unified data image with selected layers
    unified_data, suffix = create_unified_data(method, year)
    # Filter properties to export
    to_remove = ['.geo', 'system:index']
    all_properties = unified_data.bandNames().getInfo()
    properties_to_export = [p for p in all_properties if p not in to_remove]

    # Load region of interest (ROI) and create a grid over the ROI
    grid = roi.coveringGrid("EPSG:4326", 1000000)
    tile_ids = grid.aggregate_array('system:index').getInfo()
    count = 0

    # Loop over IDs
    for feature_id in tile_ids:
        count = count + 1
        feat = grid.filter(ee.Filter.eq('system:index', feature_id))

        unified_data_sampled = unified_data.sample(region = feat.geometry(), scale = 100, geometries = False)

        # Export task to Google Drive
        task = ee.batch.Export.table.toDrive(
            collection=unified_data_sampled,
            description=f'{suffix}_{count}',
            fileFormat='CSV',
            selectors=properties_to_export
        )
        task.start()
        print(f'Started export task {count}')

# Main Function to run tile-wise exports
def export_stratified(method, year):
    # Generate unified data image with selected layers
    unified_data, suffix = create_unified_data(method, year)
    # Filter properties to export
    to_remove = ['.geo', 'system:index']
    all_properties = unified_data.bandNames().getInfo()
    properties_to_export = [p for p in all_properties if p not in to_remove]

    unified_data_sampled = unified_data.stratifiedSample(numPoints = 15000, classBand = 'biome', geometries = False)
    
    # unified_data_sampled = unified_data.sample(region = roi, scale = 30, geometries = False)

    # Export task to Google Drive
    task = ee.batch.Export.table.toDrive(
        collection=unified_data_sampled,
        description=f'{suffix}',
        fileFormat='CSV',
        selectors=properties_to_export
    )
    task.start()


aggregate_LU = True
# export_stratified('eu', 0)
export_stratified('mapbiomas', 0)
# export_stratified('mapbiomas', 5)
# export_stratified('mapbiomas', 10)
# export_stratified('mapbiomas', 15)
# aggregate_LU = False
# export_stratified('mapbiomas', 0)
# export_stratified('mapbiomas', 5)
# export_stratified('mapbiomas', 10)
# export_stratified('mapbiomas', 15)

# aggregate_LU = True
# export_tilewise('eu', 0)
# export_tilewise('mapbiomas', 0)
# export_tilewise('mapbiomas', 5)
# export_tilewise('mapbiomas', 10)
# export_tilewise('mapbiomas', 15)
# aggregate_LU = False
# export_tilewise('mapbiomas', 0)
# export_tilewise('mapbiomas', 5)
# export_tilewise('mapbiomas', 10)
# export_tilewise('mapbiomas', 15)

In [8]:
secondary_mapbiomas = ee.Image(f"{data_folder}/mapbiomas/secondary")
secondary_eu = ee.Image(f"{data_folder}/eu/secondary")

secondary_mapbiomas = secondary_mapbiomas.updateMask(secondary_eu.select("age"))
secondary_eu = secondary_eu.updateMask(secondary_mapbiomas.select("age"))
secondary_mapbiomas = secondary_mapbiomas.updateMask(secondary_eu.select("age"))

categorical = ee.Image(f"{data_folder}/categorical")
biome_mask = categorical.select('biome').eq(1) \
               .Or(categorical.select('biome').eq(4)) \
               .Or(categorical.select('biome').eq(6))
categorical = categorical.updateMask(biome_mask)
cwd = ee.Image(f"{data_folder}/raw/cwd_chave")

unified_data = secondary_mapbiomas.addBands([categorical, cwd, \
                                             secondary_mapbiomas.select("age").rename("age_eu")])\
                                                .updateMask(secondary_mapbiomas.select("age"))

unified_data_sampled = unified_data.stratifiedSample(
    numPoints = 10000, classBand = 'biome', region = roi
)

task = ee.batch.Export.table.toDrive(
    collection = unified_data_sampled, description = "mapbiomas_eu", fileFormat = 'CSV'
)
task.start()

In [None]:
# to get the data from current agricultural land, to predict its regrowth potential:

# get all land use history for all plots that are currently agricultural land
# get the nearest mature biomass for each of these plots
# get the same predictors and run the model for these pixels.