<!-- # Processing the final dataset
 -->


# Export data as CSV

In [2]:
import ee
import geemap
from gee_0_utils import *

initialize()
config = ProjectConfig()
roi = config.roi
data_folder = config.data_folder
last_year = config.last_year


def export_csv(unified_data, name, numPoints, classBand):
    to_remove = ['.geo', 'system:index']
    all_properties = unified_data.bandNames().getInfo()
    properties_to_export = [p for p in all_properties if p not in to_remove]

    unified_fc = unified_data.stratifiedSample(numPoints = numPoints, classBand = classBand)

    # Export task to Google Drive
    task = ee.batch.Export.table.toDrive(
        collection = unified_fc,
        description = name,
        fileFormat = "CSV",
        selectors = properties_to_export
    )

    task.start()



## Data for all biomass and age data comparisons

In [3]:
biome =  ee.Image(f"{data_folder}/categorical").select("biome")

# Ages
mapbiomas = ee.Image(f"{data_folder}/mapbiomas_{last_year}").rename(["age_mapbiomas"])
tmf = ee.Image(f"{data_folder}/tmf_{last_year}").rename(["age_tmf"])
silva = ee.Image("projects/ee-regrowth/assets/MB_secondary_forest_Silva_Junior_v2/sforestAge_brazil_V8").select("classification_2020").rename("age_silva")

# Response variables
GEDI_L2A = ee.Image(f"{data_folder}/GEDI_L2A_{last_year}")
GEDI_L4A = ee.Image(f"{data_folder}/GEDI_L4A_{last_year}")
ESA_CCI = (ee.Image(f"projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_AGB_{last_year}_v51")
           .select("AGB").rename(f"ESA_CCI_{last_year}"))
heinrich = (ee.Image("projects/ee-regrowth/assets/Heinrich_etal_2021_updates/sforestAGC_climate_only_v1_1")
            .select("classification_2020").rename("heinrich_biomass_2020"))
lang_height = ee.Image('users/nlang/ETH_GlobalCanopyHeight_2020_10m_v1').clip(roi).rename('lang_height')

# Combine all images into a single multi-band image
comparisons = mapbiomas.addBands([tmf, silva, ESA_CCI, GEDI_L2A, GEDI_L4A, heinrich, biome])
mask = comparisons.reduce(ee.Reducer.allNonZero())
comparisons = comparisons.updateMask(mask)

# Sample and export image
# export_csv(comparisons, "comparisons_sampled", 10000, "biome")

## Mature forest biomass comparisons

In [4]:
mature_biomass = ee.Image(f"{data_folder}/mature_biomass").addBands(biome)
mature_biomass_sampled = mature_biomass.stratifiedSample(numPoints = 10000, classBand = 'biome')
# export_csv(mature_biomass_sampled, "mature_biomass_sampled")

## Field Data

In [5]:
field_data = ee.FeatureCollection(f"{data_folder}/field_biomass") # from https://github.com/forc-db/GROA/tree/master/data
biomes = ee.FeatureCollection(f"{data_folder}/raw/biomes_br").select('CD_Bioma')

# Check in which biome each field plot is located
def determine_biome(feature):
    bioma_number = biomes.filterBounds(feature.geometry()).first().get('CD_Bioma')
    return feature.set('biome', bioma_number)

field_biomass = field_data.map(determine_biome)

# export_csv(field_biomass, "field_biomass")

## Main Model Dataset

### Age, Biomass

In [6]:
# Fire and Land Use
age = ee.Image(f"{data_folder}/mapbiomas_{last_year}")

ESA_CCI = (ee.Image(f"projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_AGB_{last_year}_v51")
           .select("AGB").rename(f"ESA_CCI_{last_year}"))

fire = (ee.Image("projects/mapbiomas-public/assets/brazil/fire/collection3/mapbiomas_fire_collection3_annual_burned_coverage_v1")
    .select([f"burned_coverage_{year}" for year in config.range_1985_2020])
    .byte()
    .rename([str(year) for year in config.range_1985_2020])
    .gt(0)
    .reduce('sum').rename("num_fires")).unmask(0)

floodable_forests = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
        .select(f"classification_{last_year}").eq(6)).rename("floodable_forests")

suffixes = [
    # "aggregated_all",
    # "non_aggregated_all",
    # "non_aggregated_15yr",
    "non_aggregated_5yr"
]

land_use_list = [
    ee.Image(f"{data_folder}/land_use_{suffix}")
    for suffix in suffixes
]

### Surrounding Landscape

In [7]:
quarters_ecoreg_biomass = ee.Image("projects/amazon-forest-regrowth/assets/quarters_ecoreg_biomass")
distance_to_forest_edge = ee.Image(f"{data_folder}/distance_to_forest_edge")
sur_cover = ee.Image(f"{data_folder}/sur_cover_500m")

distance_gt_1000 = distance_to_forest_edge.gt(1000).selfMask()
mature_biomass = ee.Image(f"{data_folder}/mature_biomass")
mature_biomass_exclude_edge = mature_biomass.updateMask(distance_gt_1000).rename("mature_biomass")
# nearest_mature_biomass = ee.Image(f"{data_folder}/nearest_mature_biomass")

### Environmental

In [8]:
categorical = ee.Image(f"{data_folder}/categorical")

topography = ee.Image("CSP/ERGo/1_0/Global/ALOS_landforms").rename("topography") # 90m resolution

soil = ee.Image(f"{data_folder}/soilgrids")

terraclim = ee.Image(f"{data_folder}/yearly_terraclim") # 10,000m resolution

### Export Sampled

In [81]:
def create_and_export_grid(region_name, cell_size=50000):

    age_biome = age.addBands(categorical.select("biome"))
    
    if region_name.lower() == 'amazon':
        roi = ee.FeatureCollection(f"{data_folder}/raw/biomes_br").filterMetadata('CD_Bioma', 'equals', 1).geometry()
        export_name = f"grid_{cell_size//1000}k_amaz"
    elif region_name.lower() == 'atlantic':
        roi = ee.FeatureCollection(f"{data_folder}/raw/biomes_br").filterMetadata('CD_Bioma', 'equals', 4).geometry()
        export_name = f"grid_{cell_size//1000}k_atla"

    # First, sample locations based only on the age band
    grid = geemap.create_grid(roi, cell_size, 'EPSG:4326')

    # Function to sample one point per valid cell
    def sample_cell(cell):
        sampled_fc = age_biome.stratifiedSample(
            numPoints=5,
            classBand='biome',
            region=cell.geometry(),
            scale=30,
            geometries=True,
            dropNulls=True
        )
        # Only return a feature if we found one
        return ee.Feature(ee.Algorithms.If(
            sampled_fc.size().gt(0),
            sampled_fc.first(),
            # Return a placeholder that we can filter out later
            ee.Feature(ee.Geometry.Point([0, 0])).set('is_null', True)
        ))
    
    age_samples = grid.map(sample_cell)

    # Filter out placeholder features before exporting
    age_samples = age_samples.filter(ee.Filter.notEquals('is_null', True))

    # Export the feature collection
    export_task = ee.batch.Export.table.toAsset(
    collection=age_samples,
    description=export_name,
    assetId=f"{data_folder}/{export_name}"
    )

    # Start the export
    export_task.start()

create_and_export_grid('amazon')
# create_and_export_grid('atlantic')

In [None]:
def extract_values(feature):
    geom = feature.geometry()
    
    # List of image variable names
    image_names = ['ESA_CCI', 'fire', 'floodable_forests', 'quarters_ecoreg_biomass', 
                  'distance_to_forest_edge', 'sur_cover', 'categorical', 
                  'topography', 'soil', 'terraclim', 'grid']
    
    # Extract and set values in a single loop
    for name in image_names:
        img = globals()[name]  # Get the image object from the global namespace
        value = img.reduceRegion(ee.Reducer.first(), geom, 30).values()
        feature = feature.set(name, value)
    
    return feature

selected_pixels = ee.FeatureCollection(f"{data_folder}/grid_1k_amaz")
selected_pixels = selected_pixels.map(extract_values)

In [19]:
distance_gt_1000 = distance_to_forest_edge.gt(1000).selfMask()
mature_biomass = ee.Image(f"{data_folder}/mature_biomass")
mature_biomass_exclude_edge = mature_biomass.updateMask(distance_gt_1000).rename("mature_biomass")
mature_biomass_resampled = mature_biomass_exclude_edge.reproject(crs='EPSG:4326', scale = 10000)

edge_detec = mature_biomass_resampled.unmask(-1).zeroCrossing()
distance_to_deep_forest = edge_detec.fastDistanceTransform(1024, 'pixels').sqrt() \
    .multiply(ee.Image.pixelArea().sqrt()).toInt16().add(10000).rename("dist_deep_forest")

In [None]:
# Function to get pixel value for each feature
def extract_nearest_pixel_values(feature):
    # Get the geometry of the feature
    geom = feature.geometry()
    
    # Sample the image at the feature location
    # This will get the nearest pixel value by default
    sample = mature_biomass_resampled.sample(
        region=geom,
        scale=mature_biomass_resampled.projection().nominalScale(),  # Use the image's native scale
        geometries=True  # Keep geometries in the output
    )
    
    # Get the first (and only) element from the sample collection
    sample_value = ee.Feature(sample.first())
    
    # Merge the sampled value with the original feature
    return feature.set(sample_value.toDictionary())

# Apply the function to each feature in the collection
age_with_biomass = age.map(extract_nearest_pixel_values)


SyntaxError: invalid syntax (1737128616.py, line 1)

In [None]:
def create_buffer(feature):
    distance = feature.getNumber('dist_deep_forest')
    buffer = feature.geometry().buffer(distance)
    return feature.setGeometry(buffer)

# Apply the function to each feature in the FeatureCollection
buffered_features = selected_pixels.map(create_buffer)

# Create a list of features 
buffered_features_list = buffered_features.toList(buffered_features.size())

# Map over the list using the index
buffered_features_with_id = ee.FeatureCollection(
    ee.List.sequence(1, buffered_features.size()).map(
        lambda index: add_sequential_id(ee.Feature(buffered_features_list.get(index)), index)
    )
)

In [9]:

# Loop through each land use image
for land_use, suffix in zip(land_use_list, suffixes):
    # Add bands and update the mask
    unified_data = age.addBands([
        ESA_CCI, fire, floodable_forests, land_use, 
        quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
        categorical, topography, terraclim, soil
    ]).updateMask(age.And(land_use.select("last_lu")))

    unified_data_sampled = unified_data.stratifiedSample(numPoints = 10000, classBand = 'biome', geometries=True)
