<!-- # Processing the final dataset
 -->


# Export data as CSV

In [1]:
import ee
import geemap
from gee_0_utils import *

initialize()
config = ProjectConfig()
roi = config.roi
data_folder = config.data_folder
last_year = config.last_year


## Main Model Dataset

### Age, Biomass

In [2]:
# Fire and Land Use
age = ee.Image(f"{data_folder}/mapbiomas_{last_year}")

# ESA_CCI = (ee.Image(f"projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_AGB_{last_year}_v51").select("AGB").rename("biomass"))

ESA_CCI = ee.Image(f"{data_folder}/raw/biomass").rename("biomass")

fire = (ee.Image("projects/mapbiomas-public/assets/brazil/fire/collection3/mapbiomas_fire_collection3_annual_burned_coverage_v1")
    .select([f"burned_coverage_{year}" for year in config.range_1985_2020])
    .byte()
    .rename([str(year) for year in config.range_1985_2020])
    .gt(0)
    .reduce('sum').rename("num_fires")).unmask(0)

floodable_forests = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
        .select(f"classification_{last_year}").eq(6)).rename("floodable_forests")


### Surrounding Landscape

In [3]:
quarters_ecoreg_biomass = ee.Image("projects/amazon-forest-regrowth/assets/quarters_ecoreg_biomass")
distance_to_forest_edge = ee.Image(f"{data_folder}/distance_to_forest_edge")
sur_cover = ee.Image(f"{data_folder}/sur_cover_500m")

distance_gt_1000 = distance_to_forest_edge.gt(1000).selfMask()
mature_biomass = ee.Image(f"{data_folder}/mature_biomass")
mature_biomass_10k = ee.Image(f"{data_folder}/mature_biomass_10k")
nearest_mature = ee.Image(f"{data_folder}/nearest_mature")

### Environmental

In [None]:
categorical = ee.Image(f"{data_folder}/categorical")

topography = ee.Image("CSP/ERGo/1_0/Global/ALOS_landforms").rename("topography") # 90m resolution

soil = ee.Image(f"{data_folder}/soilgrids")

terraclim = ee.Image(f"{data_folder}/yearly_terraclim") # 10,000m resolution

# CMIP6
historical = ee.Image(f"{data_folder}/CMIP6_historical")
ssp126 = ee.Image(f"{data_folder}/CMIP6_ssp126")
ssp245 = ee.Image(f"{data_folder}/CMIP6_ssp245")
ssp585 = ee.Image(f"{data_folder}/CMIP6_ssp585")

### Export CSV

- 1 secondary forest pixel per 10km2
- 1 pasture pixel per 10km2
- 1 centroid per 50km2 (for visualization for the full Amazon)

- Total secondary forest area per 10km2
- Total pasture area per 10km2

In [None]:

latlon = ee.Image.pixelLonLat().rename(['lon', 'lat']) 
# FINAL EXPORT
unified_data = ESA_CCI.addBands([
    fire, quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
    categorical, topography, soil,nearest_mature,
    terraclim.select(["mean_srad", "mean_soil", "mean_vpd", "mean_pr"]),
    latlon
])

# 1. Get FeatureCollection size and calculate chunk size
total_features = selected_pixels.size().getInfo()
chunk_size = int(total_features * 0.02)  # 2% per chunk

# 2. Create chunk processing function
def process_chunk(chunk_index):
    start = chunk_index * chunk_size
    chunk = selected_pixels.toList(chunk_size, start)
    chunk_fc = ee.FeatureCollection(chunk)

    unified_fc = unified_data.stratifiedSample(numPoints = numPoints, )

    to_remove = ['.geo', 'system:index']
    all_properties = unified_fc.propertyNames().getInfo()
    properties_to_export = [p for p in all_properties if p not in to_remove]

    # Export task to Google Drive
    task = ee.batch.Export.table.toDrive(
        collection = unified_fc,
        description = f'unified_{chunk_index}',
        fileFormat = "CSV",
        selectors = properties_to_export
    )
    task.start()

for i in range(30):
    if i*chunk_size < total_features:
        process_chunk(i)
        print(f"Starting chunk {i}")

# task = ee.batch.Export.table.toAsset(
#     collection = unified_fc,
#     description = 'unified_fc',
#     assetId = f"{data_folder}/unified_fc_reprojected"
# )
# task.start()


Starting chunk 0


KeyboardInterrupt: 

In [None]:
lulc = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
            .select([f"classification_{year}" for year in config.range_1985_2020])
            .byte()
            .rename([str(year) for year in config.range_1985_2020]))

pastureland = lulc.select("2020").eq(15).unmask(0).rename("pastureland")

age_mask = ee.Image(f"{data_folder}/mapbiomas_{last_year}").gt(0).unmask(0)

roi = ee.FeatureCollection(f"{data_folder}/raw/biomes_br").filterMetadata('CD_Bioma', 'equals', 1).geometry()

# First, sample locations based only on the age band
grid = geemap.create_grid(roi, 10000, 'EPSG:4326')

def export_area_per_cell(image, export_name):
    area_per_cell = image.reduceRegions(
        collection = grid,
        reducer = ee.Reducer.sum(),
        scale = 30
    )

    export_task = ee.batch.Export.table.toAsset(
        collection = area_per_cell,
        description = export_name,
        assetId = f"{data_folder}/{export_name}"
    )

    export_task.start()

export_area_per_cell(pastureland, "pasture_area")
export_area_per_cell(age_mask, "secondary_area")

# map = geemap.Map()
# map.addLayer(pastureland, {}, "Pastureland")
# map


In [None]:
tst = ee.Image(f"{data_folder}/pasture_area_old")

map = geemap.Map()
map.addLayer(tst, {}, "Pastureland")
map.addLayer(pastureland, {}, "Pastureland")
map

## Field Data

In [None]:

field_data = ee.FeatureCollection(f"{data_folder}/field_biomass")

selected_pixels = distance_to_deep_forest.reduceRegions(
    collection = field_data,
    reducer = ee.Reducer.first(),
    scale = 10000
)

def buffer_feature(feature):
    distance = feature.getNumber('first').add(10000)
    buffer = feature.geometry().buffer(distance)
    return feature.setGeometry(buffer)

# Buffer each point to reach the nearest pixel
buffered_features = selected_pixels.map(buffer_feature)

# Extract the biomass value for each buffered region
# This will get the value from nearest valid pixel
nearest_mature = mature_biomass_10k.reduceRegions(
    collection=buffered_features,
    reducer=ee.Reducer.firstNonNull(),
    scale=10000
).map(lambda feature: feature.centroid())

unified_img = ESA_CCI.addBands([fire, floodable_forests, 
    quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
    categorical, topography, terraclim, soil
])

unified_field = unified_img.reduceRegions(nearest_mature, ee.Reducer.first(), 30)

# Export task to Google Drive
task = ee.batch.Export.table.toDrive(
    collection = unified_field,
    description = 'unified_field',
    fileFormat = "CSV"
)
task.start()



In [5]:
field_data = ee.FeatureCollection(f"{data_folder}/field_biomass") # from https://github.com/forc-db/GROA/tree/master/data
biomes = ee.FeatureCollection(f"{data_folder}/raw/biomes_br").select('CD_Bioma')

# Check in which biome each field plot is located
def determine_biome(feature):
    bioma_number = biomes.filterBounds(feature.geometry()).first().get('CD_Bioma')
    return feature.set('biome', bioma_number)

field_biome = field_data.map(determine_biome)

# Export task to Google Drive
task = ee.batch.Export.table.toDrive(
    collection = field_biome,
    description = "field_biome",
    fileFormat = "CSV"
)

task.start()

In [18]:
lulc = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
        .select([f"classification_{year}" for year in config.range_1985_2020])
        .byte()
        .rename([str(year) for year in config.range_1985_2020]))

age = ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_secondary_vegetation_age_v1").select("secondary_vegetation_age_2020")

lulc_age = lulc.addBands(age)

field_age_lulc = lulc_age.reduceRegions(
    collection = field_data,
    reducer = ee.Reducer.first(),
    scale = 10000
)

# Export task to Google Drive
task = ee.batch.Export.table.toDrive(
    collection = field_age_lulc,
    description = 'field_age_lulc',
    fileFormat = "CSV"
)
task.start()


### Export Data for Modelling (with diffrent land use aggregations)


In [16]:
# Loop through each land use image
for land_use, suffix in zip(land_use_list, suffixes):

    pixels_to_sample = categorical.select("biome").addBands(distance_to_deep_forest).updateMask(land_use.select("last_lu")) # selecting only for the pixels with the desired land use history

    selected_pixels = pixels_to_sample.stratifiedSample(numPoints = 10000, classBand = "biome")

    # Buffer each point to reach the nearest pixel
    buffered_features = selected_pixels.map(buffer_feature)

    # Extract the biomass value for each buffered region
    # This will get the value from nearest valid pixel
    nearest_mature = mature_biomass_10k.reduceRegions(
        collection=buffered_features,
        reducer=ee.Reducer.firstNonNull(),
        scale=10000
    )

    unified_img = age.addBands([
        ESA_CCI, fire, floodable_forests, land_use, 
        quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
        categorical, topography, terraclim, soil
    ])

    unified_fc = unified_img.reduceRegions(selected_pixels, ee.Reducer.first(), 30)

    # Sample and export image
    export_csv(unified_img, suffix, 10000, "biome")

In [None]:

suffixes = [
    "aggregated_all",
    "non_aggregated_all",
    # "non_aggregated_15yr",
    "non_aggregated_5yr"
]

land_use_list = [
    ee.Image(f"{data_folder}/land_use_{suffix}")
    for suffix in suffixes
]

land_use = land_use_list[0]


# pixels_to_sample = categorical.select("biome").addBands(distance_to_deep_forest).updateMask(land_use.select("last_lu")) # selecting only for the pixels with the desired land use history

# selected_pixels = pixels_to_sample.stratifiedSample(numPoints = 10000, classBand = "biome")

# # Buffer each point to reach the nearest pixel
# buffered_features = selected_pixels.map(buffer_feature)

# # Extract the biomass value for each buffered region
# # This will get the value from nearest valid pixel
# nearest_mature = mature_biomass_10k.reduceRegions(
#     collection=buffered_features,
#     reducer=ee.Reducer.firstNonNull(),
#     scale=10000
# )

# unified_img = age.addBands([
#     ESA_CCI, fire, floodable_forests, land_use, 
#     quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
#     categorical, topography, terraclim, soil
# ])

# unified_fc = unified_img.reduceRegions(selected_pixels, ee.Reducer.first(), 30)

map = geemap.Map()
map.addLayer(land_use.select("last_lu"), {}, "land_use")
map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [47]:
predictions = ee.FeatureCollection(f"{data_folder}/predictions")

# Convert the feature collection to an image
predictions_image = predictions.reduceToImage(
        properties = ['percent_er'],
        reducer = ee.Reducer.first()
    ).reproject(
        crs='EPSG:4326',  # or match your source CRS
        scale=10000
    ).rename("percent_er")

unified = ee.FeatureCollection(f"{data_folder}/unified_fc").reduceToImage(
        properties = ['age'],
        reducer = ee.Reducer.first()
    ).reproject(
        crs='EPSG:4326',  # or match your source CRS
        scale=10000
    ).rename("age")

grid_pasture = ee.FeatureCollection(f"{data_folder}/grid_1k_amazon_pastureland")

pasture_area = ee.Image(f"{data_folder}/pasture_area").rename("pasture_area")

map = geemap.Map()
map.addLayer(pasture_area, {'min':0, 'max':1, 'palette':['green', 'red']}, "pasture_area")
map.addLayer(grid_pasture, { }, "grid_pasture")
# map.addLayer(predictions_image, {'min': -0.6, 'max': 0, 'palette': ['red', 'green']}, "predictions")
# map.addLayer(unified, {'min': 0, 'max': 25, 'palette': ['red', 'green']}, "age")
map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

## Mature forest biomass comparisons

In [None]:
biome =  ee.Image(f"{data_folder}/categorical").select("biome")
mature_biomass = ee.Image(f"{data_folder}/mature_biomass").addBands(biome)

# export_csv(mature_biomass, "mature_biomass", 10000, "biome")