<!-- # Processing the final dataset
 -->


# Export data as CSV

In [1]:
import ee
import geemap
from gee_0_utils import *

initialize()
config = ProjectConfig()
roi = config.roi
data_folder = config.data_folder
last_year = config.last_year


def export_csv(unified_data, name, numPoints, classBand):
    to_remove = ['.geo', 'system:index']
    all_properties = unified_data.bandNames().getInfo()
    properties_to_export = [p for p in all_properties if p not in to_remove]

    unified_fc = unified_data.stratifiedSample(numPoints = numPoints, classBand = classBand)

    # Export task to Google Drive
    task = ee.batch.Export.table.toDrive(
        collection = unified_fc,
        description = name,
        fileFormat = "CSV",
        selectors = properties_to_export
    )

    task.start()



Gtk-Message: 10:52:31.396: Not loading module "atk-bridge": The functionality is provided by GTK natively. Please try to not load it.



Successfully saved authorization token.


## Mature forest biomass comparisons

In [None]:
biome =  ee.Image(f"{data_folder}/categorical").select("biome")
mature_biomass = ee.Image(f"{data_folder}/mature_biomass").addBands(biome)

# export_csv(mature_biomass, "mature_biomass", 10000, "biome")

## Main Model Dataset

### Age, Biomass

In [4]:
# Fire and Land Use
age = ee.Image(f"{data_folder}/mapbiomas_{last_year}")

# ESA_CCI = (ee.Image(f"projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_AGB_{last_year}_v51").select("AGB").rename("biomass"))

ESA_CCI = ee.Image(f"{data_folder}/raw/biomass")

fire = (ee.Image("projects/mapbiomas-public/assets/brazil/fire/collection3/mapbiomas_fire_collection3_annual_burned_coverage_v1")
    .select([f"burned_coverage_{year}" for year in config.range_1985_2020])
    .byte()
    .rename([str(year) for year in config.range_1985_2020])
    .gt(0)
    .reduce('sum').rename("num_fires")).unmask(0)

floodable_forests = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
        .select(f"classification_{last_year}").eq(6)).rename("floodable_forests")


### Surrounding Landscape

In [None]:
quarters_ecoreg_biomass = ee.Image("projects/amazon-forest-regrowth/assets/quarters_ecoreg_biomass")
distance_to_forest_edge = ee.Image(f"{data_folder}/distance_to_forest_edge")
sur_cover = ee.Image(f"{data_folder}/sur_cover_500m")

distance_gt_1000 = distance_to_forest_edge.gt(1000).selfMask()
mature_biomass = ee.Image(f"{data_folder}/mature_biomass")
mature_biomass_10k = ee.Image(f"{data_folder}/mature_biomass_10k")



### Environmental

In [6]:
categorical = ee.Image(f"{data_folder}/categorical")

topography = ee.Image("CSP/ERGo/1_0/Global/ALOS_landforms").rename("topography") # 90m resolution

soil = ee.Image(f"{data_folder}/soilgrids")

terraclim = ee.Image(f"{data_folder}/yearly_terraclim") # 10,000m resolution

In [7]:
historical = ee.Image(f"{data_folder}/CMIP6_historical")
ssp126 = ee.Image(f"{data_folder}/CMIP6_ssp126")
ssp245 = ee.Image(f"{data_folder}/CMIP6_ssp245")
ssp585 = ee.Image(f"{data_folder}/CMIP6_ssp585")


In [22]:
lulc = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
            .select([f"classification_{year}" for year in config.range_1985_2020])
            .byte()
            .rename([str(year) for year in config.range_1985_2020]))

pastureland = lulc.select("2020").eq(15)

# pasture_area = pastureland.reduceResolution(reducer = ee.Reducer.mean(), bestEffort = True).rename("pasture_area")

# pasture_area = ee.Image(f"{data_folder}/pasture_area").rename("pasture_area")

# map = geemap.Map()
# # map.addLayer(pastureland, {'min':0, 'max':1, 'palette':['green', 'red']}, "pastureland")
# map.addLayer(pasture_area, {'min':0, 'max':1, 'palette':['green', 'red']}, "pasture_area")
# map

# export_image(pasture_area, "pasture_area", region = roi, scale = 1000)


### Export Sampled Full Data

In [11]:
def create_and_export_grid(region_name, target, cell_size = 50000):

    pixels_to_sample = categorical.select("biome")#.updateMask(target)
    
    roi = ee.FeatureCollection(f"{data_folder}/raw/biomes_br").filterMetadata('CD_Bioma', 'equals', 1).geometry()
    export_name = f"grid_{cell_size//1000}k_{region_name}_all"

    # First, sample locations based only on the age band
    grid = geemap.create_grid(roi, cell_size, 'EPSG:4326')

    # Function to sample one point per valid cell
    def sample_cell(cell):
        sampled_fc = pixels_to_sample.stratifiedSample(
            numPoints = 1,
            classBand = 'biome',
            region = cell.geometry(),
            scale = 30,
            geometries = True,
            dropNulls = True
        )

        # Only return a feature if we found one
        return ee.Feature(ee.Algorithms.If(
            sampled_fc.size().gt(0),
            sampled_fc.first(),
            # Return a placeholder that we can filter out later
            ee.Feature(ee.Geometry.Point([0, 0])).set('is_null', True)
        ))

    samples = grid.map(sample_cell)

    # Filter out placeholder features before exporting
    samples = samples.filter(ee.Filter.notEquals('is_null', True))

    # Export the feature collection
    export_task = ee.batch.Export.table.toAsset(
    collection=samples,
    description=export_name,
    assetId=f"{data_folder}/{export_name}"
    )

    # Start the export
    export_task.start()

create_and_export_grid('amazon', pastureland)
# create_and_export_grid('atlantic')

In [None]:
mask = mature_biomass_10k.unmask(0)

filled_biomass = mature_biomass_10k.where(
    mask.Not(),  # Target NA pixels
    mature_biomass_10k.updateMask(mask).reduceNeighborhood(
        reducer=ee.Reducer.first(),
        kernel=ee.Kernel.euclidean(radius=100, units='pixels')
    )
)

export_image(filled_biomass, "filled_biomass", region = roi, scale = 10000)

# filled_biomass = ee.Image(f"{data_folder}/filled_biomass")

map = geemap.Map()
map.addLayer(mature_biomass_10k, {'min':0, 'max':100, 'palette':['white', 'black']}, "mature_biomass_10k")
# map.addLayer(filled_biomass, {'min':0, 'max':100, 'palette':['white', 'black']}, "filled_biomass")
map.addLayer(mask, {}, 'mask')
map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [None]:
# selected_pixels = ee.FeatureCollection(f"{data_folder}/grid_10k_amaz")
grid = ee.FeatureCollection(f"{data_folder}/grid_1k_amazon_pastureland")

edge_detec = mature_biomass_10k.unmask(-1).zeroCrossing()
distance_to_10k_forest = edge_detec.fastDistanceTransform(100, 'pixels').sqrt() \
    .multiply(ee.Image.pixelArea().sqrt()).toInt32().add(10000).rename("distance_to_10k_forest")

# FINAL EXPORT
unified_img = ESA_CCI.addBands([
    fire, 
    quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
    categorical, topography, soil,
    terraclim.select(["mean_srad", "mean_soil", "mean_vpd", "mean_pr"])
])

# 1. Get FeatureCollection size and calculate chunk size
total_features = grid.size().getInfo()
chunk_size = int(total_features * 0.1)  # 10% per chunk


def buffer_feature(feature):
    distance = feature.getNumber('first').add(10000)
    buffer = feature.geometry().buffer(distance)
    return feature.setGeometry(buffer)

# 2. Create chunk processing function
def process_chunk(chunk_index):
    start = chunk_index * chunk_size
    chunk = grid.toList(chunk_size, start)
    selected_pixels = ee.FeatureCollection(chunk)

    selected_pixels = distance_to_10k_forest.reduceRegions(
        collection = selected_pixels,
        reducer = ee.Reducer.first(),
        scale = 10000
    )

    # Buffer each point to reach the nearest pixel
    buffered_features = selected_pixels.map(buffer_feature)

    # Extract the biomass value for each buffered region
    # This will get the value from nearest valid pixel
    nearest_mature = mature_biomass_10k.reduceRegions(
        collection = buffered_features,
        reducer = ee.Reducer.firstNonNull(),
        scale = 10000
    ).map(lambda feature: feature.centroid())

    unified_fc = unified_img.reduceRegions(nearest_mature, ee.Reducer.first(), 30)

    # Export task to Google Drive
    task = ee.batch.Export.table.toDrive(
        collection = unified_fc,
        description = f'unified_{chunk_index}',
        fileFormat = "CSV"
    )
    task.start()

for i in range(10):
    if i*chunk_size < total_features:
        process_chunk(i)
        print(f"Starting chunk {i}")

# task = ee.batch.Export.table.toAsset(
#     collection = unified_fc,
#     description = 'unified_fc',
#     assetId = f"{data_folder}/unified_fc_reprojected"
# )
# task.start()


Starting chunk 0: None
Starting chunk 1: None
Starting chunk 2: None
Starting chunk 3: None
Starting chunk 4: None
Starting chunk 5: None
Starting chunk 6: None
Starting chunk 7: None
Starting chunk 8: None
Starting chunk 9: None


## Field Data

In [None]:

field_data = ee.FeatureCollection(f"{data_folder}/field_biomass")

selected_pixels = distance_to_deep_forest.reduceRegions(
    collection = field_data,
    reducer = ee.Reducer.first(),
    scale = 10000
)

def buffer_feature(feature):
    distance = feature.getNumber('first').add(10000)
    buffer = feature.geometry().buffer(distance)
    return feature.setGeometry(buffer)

# Buffer each point to reach the nearest pixel
buffered_features = selected_pixels.map(buffer_feature)

# Extract the biomass value for each buffered region
# This will get the value from nearest valid pixel
nearest_mature = mature_biomass_10k.reduceRegions(
    collection=buffered_features,
    reducer=ee.Reducer.firstNonNull(),
    scale=10000
).map(lambda feature: feature.centroid())

unified_img = ESA_CCI.addBands([fire, floodable_forests, 
    quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
    categorical, topography, terraclim, soil
])

unified_field = unified_img.reduceRegions(nearest_mature, ee.Reducer.first(), 30)

# Export task to Google Drive
task = ee.batch.Export.table.toDrive(
    collection = unified_field,
    description = 'unified_field',
    fileFormat = "CSV"
)
task.start()



In [5]:
field_data = ee.FeatureCollection(f"{data_folder}/field_biomass") # from https://github.com/forc-db/GROA/tree/master/data
biomes = ee.FeatureCollection(f"{data_folder}/raw/biomes_br").select('CD_Bioma')

# Check in which biome each field plot is located
def determine_biome(feature):
    bioma_number = biomes.filterBounds(feature.geometry()).first().get('CD_Bioma')
    return feature.set('biome', bioma_number)

field_biome = field_data.map(determine_biome)

# Export task to Google Drive
task = ee.batch.Export.table.toDrive(
    collection = field_biome,
    description = "field_biome",
    fileFormat = "CSV"
)

task.start()

In [18]:
lulc = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
        .select([f"classification_{year}" for year in config.range_1985_2020])
        .byte()
        .rename([str(year) for year in config.range_1985_2020]))

age = ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_secondary_vegetation_age_v1").select("secondary_vegetation_age_2020")

lulc_age = lulc.addBands(age)

field_age_lulc = lulc_age.reduceRegions(
    collection = field_data,
    reducer = ee.Reducer.first(),
    scale = 10000
)

# Export task to Google Drive
task = ee.batch.Export.table.toDrive(
    collection = field_age_lulc,
    description = 'field_age_lulc',
    fileFormat = "CSV"
)
task.start()


### Export Data for Modelling (with diffrent land use aggregations)


In [16]:
# Loop through each land use image
for land_use, suffix in zip(land_use_list, suffixes):

    pixels_to_sample = categorical.select("biome").addBands(distance_to_deep_forest).updateMask(land_use.select("last_lu")) # selecting only for the pixels with the desired land use history

    selected_pixels = pixels_to_sample.stratifiedSample(numPoints = 10000, classBand = "biome")

    # Buffer each point to reach the nearest pixel
    buffered_features = selected_pixels.map(buffer_feature)

    # Extract the biomass value for each buffered region
    # This will get the value from nearest valid pixel
    nearest_mature = mature_biomass_10k.reduceRegions(
        collection=buffered_features,
        reducer=ee.Reducer.firstNonNull(),
        scale=10000
    )

    unified_img = age.addBands([
        ESA_CCI, fire, floodable_forests, land_use, 
        quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
        categorical, topography, terraclim, soil
    ])

    unified_fc = unified_img.reduceRegions(selected_pixels, ee.Reducer.first(), 30)

    # Sample and export image
    export_csv(unified_img, suffix, 10000, "biome")

In [None]:

suffixes = [
    "aggregated_all",
    "non_aggregated_all",
    # "non_aggregated_15yr",
    "non_aggregated_5yr"
]

land_use_list = [
    ee.Image(f"{data_folder}/land_use_{suffix}")
    for suffix in suffixes
]

land_use = land_use_list[0]


# pixels_to_sample = categorical.select("biome").addBands(distance_to_deep_forest).updateMask(land_use.select("last_lu")) # selecting only for the pixels with the desired land use history

# selected_pixels = pixels_to_sample.stratifiedSample(numPoints = 10000, classBand = "biome")

# # Buffer each point to reach the nearest pixel
# buffered_features = selected_pixels.map(buffer_feature)

# # Extract the biomass value for each buffered region
# # This will get the value from nearest valid pixel
# nearest_mature = mature_biomass_10k.reduceRegions(
#     collection=buffered_features,
#     reducer=ee.Reducer.firstNonNull(),
#     scale=10000
# )

# unified_img = age.addBands([
#     ESA_CCI, fire, floodable_forests, land_use, 
#     quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
#     categorical, topography, terraclim, soil
# ])

# unified_fc = unified_img.reduceRegions(selected_pixels, ee.Reducer.first(), 30)

map = geemap.Map()
map.addLayer(land_use.select("last_lu"), {}, "land_use")
map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [47]:
predictions = ee.FeatureCollection(f"{data_folder}/predictions")

# Convert the feature collection to an image
predictions_image = predictions.reduceToImage(
        properties = ['percent_er'],
        reducer = ee.Reducer.first()
    ).reproject(
        crs='EPSG:4326',  # or match your source CRS
        scale=10000
    ).rename("percent_er")

unified = ee.FeatureCollection(f"{data_folder}/unified_fc").reduceToImage(
        properties = ['age'],
        reducer = ee.Reducer.first()
    ).reproject(
        crs='EPSG:4326',  # or match your source CRS
        scale=10000
    ).rename("age")

grid_pasture = ee.FeatureCollection(f"{data_folder}/grid_1k_amazon_pastureland")

pasture_area = ee.Image(f"{data_folder}/pasture_area").rename("pasture_area")

map = geemap.Map()
map.addLayer(pasture_area, {'min':0, 'max':1, 'palette':['green', 'red']}, "pasture_area")
map.addLayer(grid_pasture, { }, "grid_pasture")
# map.addLayer(predictions_image, {'min': -0.6, 'max': 0, 'palette': ['red', 'green']}, "predictions")
# map.addLayer(unified, {'min': 0, 'max': 25, 'palette': ['red', 'green']}, "age")
map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…