<!-- # Processing the final dataset
 -->


In [2]:
import ee
import geemap
from gee_0_utils import *

initialize()
config = ProjectConfig()
roi = config.roi
data_folder = config.data_folder

In [3]:
def keep_rare_lu_types(unified_data):
    # Define the list of bands to check
    bands_to_check = ['lulc_sum_20', 'lulc_sum_21', 'lulc_sum_35', 
                    'lulc_sum_39', 'lulc_sum_40', 'lulc_sum_41', 
                    'lulc_sum_46', 'lulc_sum_48', 'lulc_sum_9']

    # Create a mask where at least one of the specified bands is non-zero
    mask = unified_data.select(bands_to_check).reduce(ee.Reducer.anyNonZero())

    # # Apply the mask to the unified_data
    return unified_data.updateMask(mask)

# to try and reduce some level of spatial autocorrelation:
def sample_by_grid(unified_data):

    region = unified_data.geometry()
    grid = region.coveringGrid(region.projection(), 1000)

    # Function to check if there's at least one cell of unified_data in the grid cell
    def check_overlap(grid_cell):
        geometry = grid_cell.geometry()
        
        sampled = unified_data.stratifiedSample(
            numPoints = 1, classBand = 'biome', region = geometry, scale = 50000
        )

        return sampled.first()

    # Apply the function to each grid cell
    return grid.map(check_overlap, dropNulls = True)



In [4]:
method = "mapbiomas"
secondary = ee.Image(f"{data_folder}/{method}/secondary")
nearest_mature = ee.Image(f"{data_folder}/{method}/nearest_mature_biomass_image").rename("nearest_mature_biomass")
distance_to_nearest_mature = ee.Image(f"{data_folder}/{method}/distance_to_nearest_mature")
cwd = ee.Image(f"{data_folder}/raw/cwd_chave").int16()
sur_cover = ee.Image(f"{data_folder}/{method}/sur_cover")
categorical = ee.Image(f"{data_folder}/categorical").select(["biome", "ecoreg", "protec", "indig"])
topography = ee.Image("CSP/ERGo/1_0/Global/ALOS_landforms").rename("topography")
soil = ee.Image(f"{data_folder}/soilgrids").select(['nitro', 'sand', 'phh2o'])  # Dropping multicollinear bands
climate = ee.Image(f"{data_folder}/yearly_terraclim")

distance_to_border_mask = ee.Image(f"{data_folder}/distance_to_border_mask").byte()
one_hectare_mask = ee.Image(f"{data_folder}/{method}/one_hectare_mask").selfMask()

# Combine all bands with additional masking conditions
combined_mask = nearest_mature.mask().And(distance_to_border_mask).And(one_hectare_mask)

unified_data = secondary.addBands([
    nearest_mature, sur_cover, cwd, distance_to_nearest_mature, 
    categorical, soil, topography, climate
]).updateMask(combined_mask)


# map = geemap.Map()
# map.addLayer(unified_data, {}, 'unified_data')
# map


Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [4]:

# Function to preprocess images and create the unified data image
def create_unified_data(method, year):
    secondary = ee.Image(f"{data_folder}/{method}/secondary")
    nearest_mature = ee.Image(f"{data_folder}/{method}/nearest_mature_biomass_image").rename("nearest_mature_biomass")
    distance_to_nearest_mature = ee.Image(f"{data_folder}/{method}/distance_to_nearest_mature")
    cwd = ee.Image(f"{data_folder}/raw/cwd_chave").int16()
    sur_cover = ee.Image(f"{data_folder}/{method}/sur_cover")
    categorical = ee.Image(f"{data_folder}/categorical").select(["biome", "ecoreg", "protec", "indig"])
    topography = ee.Image("CSP/ERGo/1_0/Global/ALOS_landforms").rename("topography")
    soil = ee.Image(f"{data_folder}/soilgrids").select(['nitro', 'sand', 'phh2o'])  # Dropping multicollinear bands
    climate = ee.Image(f"{data_folder}/yearly_terraclim")

    distance_to_border_mask = ee.Image(f"{data_folder}/distance_to_border_mask").byte()
    one_hectare_mask = ee.Image(f"{data_folder}/{method}/one_hectare_mask").selfMask()

    # Combine all bands with additional masking conditions
    combined_mask = nearest_mature.mask().And(distance_to_border_mask).And(one_hectare_mask)

    unified_data = secondary.addBands([
        nearest_mature, sur_cover, cwd, distance_to_nearest_mature, 
        categorical, soil, topography, climate
    ])

    if method == "mapbiomas":

        if (aggregate_LU):
            suffix = "aggregated"
        else:
            suffix = "non_aggregated"

        if (year != 0):
            suffix += f"_{year}yr"
        else:
            suffix += "_all"

        land_use = ee.Image(f"{data_folder}/{method}/land_use_{suffix}")
        unified_data = unified_data.addBands([land_use])
        combined_mask = combined_mask.And(land_use.select("last_LU").mask())
    else:
        suffix = "eu"
        fire = ee.Image(f"{data_folder}/{method}/num_fires")
        unified_data = unified_data.addBands([fire])


    unified_data = unified_data.updateMask(combined_mask)

    return unified_data, suffix


# Main Function to run tile-wise exports
def run_exports(method, year):
    # Generate unified data image with selected layers
    unified_data, suffix = create_unified_data(method, year)

    # Sample within the tile geometry
    unified_data_sampled = unified_data.stratifiedSample(
        numPoints = 100000, classBand = 'biome', geometries=False
    )

    # Filter properties to export
    to_remove = ['.geo', 'system:index']
    all_properties = unified_data.bandNames().getInfo()
    properties_to_export = [p for p in all_properties if p not in to_remove]

    # Export task to Google Drive
    task = ee.batch.Export.table.toDrive(
        collection=unified_data_sampled,
        description=f'{suffix}',
        # folder = f'{method}_{suffix}',
        fileFormat='CSV',
        selectors=properties_to_export
    )
    task.start()
    # print(f'Started export task for tile {tile_id}')


aggregate_LU = False

run_exports('mapbiomas', 0)
run_exports('mapbiomas', 5)
run_exports('mapbiomas', 10)
run_exports('mapbiomas', 15)

In [8]:
secondary_mapbiomas = ee.Image(f"{data_folder}/mapbiomas/secondary")
secondary_eu = ee.Image(f"{data_folder}/eu/secondary")

secondary_mapbiomas = secondary_mapbiomas.updateMask(secondary_eu.select("age"))
secondary_eu = secondary_eu.updateMask(secondary_mapbiomas.select("age"))
secondary_mapbiomas = secondary_mapbiomas.updateMask(secondary_eu.select("age"))

categorical = ee.Image(f"{data_folder}/categorical")
biome_mask = categorical.select('biome').eq(1) \
               .Or(categorical.select('biome').eq(4)) \
               .Or(categorical.select('biome').eq(6))
categorical = categorical.updateMask(biome_mask)
cwd = ee.Image(f"{data_folder}/raw/cwd_chave")

unified_data = secondary_mapbiomas.addBands([categorical, cwd, \
                                             secondary_mapbiomas.select("age").rename("age_eu")])\
                                                .updateMask(secondary_mapbiomas.select("age"))

unified_data_sampled = unified_data.stratifiedSample(
    numPoints = 10000, classBand = 'biome', region = roi
)

task = ee.batch.Export.table.toDrive(
    collection = unified_data_sampled, description = "mapbiomas_eu", fileFormat = 'CSV'
)
task.start()

In [None]:
# to get the data from current agricultural land, to predict its regrowth potential:

# get all land use history for all plots that are currently agricultural land
# get the nearest mature biomass for each of these plots
# get the same predictors and run the model for these pixels.