<!-- # Processing the final dataset
 -->


# Export data as CSV

In [6]:
import ee
import geemap
from gee_0_utils import *

initialize()
config = ProjectConfig()
roi = config.roi
data_folder = config.data_folder
last_year = config.last_year


def export_csv(unified_data, name, numPoints, classBand):
    to_remove = ['.geo', 'system:index']
    all_properties = unified_data.bandNames().getInfo()
    properties_to_export = [p for p in all_properties if p not in to_remove]

    unified_fc = unified_data.stratifiedSample(numPoints = numPoints, classBand = classBand)

    # Export task to Google Drive
    task = ee.batch.Export.table.toDrive(
        collection = unified_fc,
        description = name,
        fileFormat = "CSV",
        selectors = properties_to_export
    )

    task.start()



## Data for all biomass and age data comparisons

In [None]:
biome =  ee.Image(f"{data_folder}/categorical").select("biome")

# Ages
mapbiomas = ee.Image(f"{data_folder}/mapbiomas_{last_year}").rename(["age_mapbiomas"])
tmf = ee.Image(f"{data_folder}/tmf_{last_year}").rename(["age_tmf"])
silva = ee.Image("projects/ee-regrowth/assets/MB_secondary_forest_Silva_Junior_v2/sforestAge_brazil_V8").select("classification_2020").rename("age_silva")

# Response variables
GEDI_L2A = ee.Image(f"{data_folder}/GEDI_L2A_{last_year}")
GEDI_L4A = ee.Image(f"{data_folder}/GEDI_L4A_{last_year}")
ESA_CCI = (ee.Image(f"projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_AGB_{last_year}_v51")
           .select("AGB").rename(f"ESA_CCI_{last_year}"))
heinrich = (ee.Image("projects/ee-regrowth/assets/Heinrich_etal_2021_updates/sforestAGC_climate_only_v1_1")
            .select("classification_2020").rename("heinrich_biomass_2020"))
lang_height = ee.Image('users/nlang/ETH_GlobalCanopyHeight_2020_10m_v1').clip(roi).rename('lang_height')

# Combine all images into a single multi-band image
comparisons = mapbiomas.addBands([tmf, silva, ESA_CCI, GEDI_L2A, GEDI_L4A, heinrich, biome])
mask = comparisons.reduce(ee.Reducer.allNonZero())
comparisons = comparisons.updateMask(mask)

# Sample and export image
# export_csv(comparisons, "comparisons_sampled", 10000, "biome")

## Mature forest biomass comparisons

In [None]:
mature_biomass = ee.Image(f"{data_folder}/mature_biomass").addBands(biome)

# export_csv(mature_biomass, "mature_biomass", 10000, "biome")

## Field Data

In [None]:
field_data = ee.FeatureCollection(f"{data_folder}/field_biomass") # from https://github.com/forc-db/GROA/tree/master/data
biomes = ee.FeatureCollection(f"{data_folder}/raw/biomes_br").select('CD_Bioma')

# Check in which biome each field plot is located
def determine_biome(feature):
    bioma_number = biomes.filterBounds(feature.geometry()).first().get('CD_Bioma')
    return feature.set('biome', bioma_number)

field_biomass = field_data.map(determine_biome)

# export_csv(field_biomass, "field_biomass")

# # Export task to Google Drive
# task = ee.batch.Export.table.toDrive(
#     collection = field_biomass,
#     description = "field_biomass",
#     fileFormat = "CSV"
# )

# task.start()

## Main Model Dataset

### Age, Biomass

In [None]:
# Fire and Land Use
age = ee.Image(f"{data_folder}/mapbiomas_{last_year}")

ESA_CCI = (ee.Image(f"projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_AGB_{last_year}_v51")
           .select("AGB").rename(f"ESA_CCI_{last_year}"))

fire = (ee.Image("projects/mapbiomas-public/assets/brazil/fire/collection3/mapbiomas_fire_collection3_annual_burned_coverage_v1")
    .select([f"burned_coverage_{year}" for year in config.range_1985_2020])
    .byte()
    .rename([str(year) for year in config.range_1985_2020])
    .gt(0)
    .reduce('sum').rename("num_fires")).unmask(0)

floodable_forests = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
        .select(f"classification_{last_year}").eq(6)).rename("floodable_forests")


### Surrounding Landscape

In [11]:
quarters_ecoreg_biomass = ee.Image("projects/amazon-forest-regrowth/assets/quarters_ecoreg_biomass")
distance_to_forest_edge = ee.Image(f"{data_folder}/distance_to_forest_edge")
sur_cover = ee.Image(f"{data_folder}/sur_cover_500m")

distance_gt_1000 = distance_to_forest_edge.gt(1000).selfMask()
mature_biomass = ee.Image(f"{data_folder}/mature_biomass")
mature_biomass_10k = ee.Image(f"{data_folder}/mature_biomass_10k")

edge_detec = mature_biomass_10k.unmask(-1).zeroCrossing()
distance_to_deep_forest = edge_detec.fastDistanceTransform(100, 'pixels').sqrt() \
    .multiply(ee.Image.pixelArea().sqrt()).toInt32().add(10000).rename("dist_deep_forest")

### Environmental

In [12]:
categorical = ee.Image(f"{data_folder}/categorical")

topography = ee.Image("CSP/ERGo/1_0/Global/ALOS_landforms").rename("topography") # 90m resolution

soil = ee.Image(f"{data_folder}/soilgrids")

terraclim = ee.Image(f"{data_folder}/yearly_terraclim") # 10,000m resolution

### Export Sampled Full Data

In [None]:
def create_and_export_grid(region_name, cell_size = 1000):

    pixels_to_sample = categorical.select("biome").updateMask(age)
    
    if region_name.lower() == 'amazon':
        roi = ee.FeatureCollection(f"{data_folder}/raw/biomes_br").filterMetadata('CD_Bioma', 'equals', 1).geometry()
        export_name = f"grid_{cell_size//1000}k_amaz"
    elif region_name.lower() == 'atlantic':
        roi = ee.FeatureCollection(f"{data_folder}/raw/biomes_br").filterMetadata('CD_Bioma', 'equals', 4).geometry()
        export_name = f"grid_{cell_size//1000}k_atla"

    # First, sample locations based only on the age band
    grid = geemap.create_grid(roi, cell_size, 'EPSG:4326')

    # Function to sample one point per valid cell
    def sample_cell(cell):
        sampled_fc = pixels_to_sample.stratifiedSample(
            numPoints=1,
            classBand='biome',
            region=cell.geometry(),
            scale=30,
            geometries=True,
            dropNulls=True
        )
        # Only return a feature if we found one
        return ee.Feature(ee.Algorithms.If(
            sampled_fc.size().gt(0),
            sampled_fc.first(),
            # Return a placeholder that we can filter out later
            ee.Feature(ee.Geometry.Point([0, 0])).set('is_null', True)
        ))

    samples = grid.map(sample_cell)

    # Filter out placeholder features before exporting
    samples = samples.filter(ee.Filter.notEquals('is_null', True))

    # Export the feature collection
    export_task = ee.batch.Export.table.toAsset(
    collection=samples,
    description=export_name,
    assetId=f"{data_folder}/{export_name}"
    )

    # Start the export
    export_task.start()

# create_and_export_grid('amazon')
# create_and_export_grid('atlantic')

In [None]:
selected_pixels = ee.FeatureCollection(f"{data_folder}/grid_10k_amaz")
selected_pixels = distance_to_deep_forest.reduceRegions(
    collection=selected_pixels,
    reducer=ee.Reducer.first(),
    scale=10000
)

def buffer_feature(feature):
    distance = feature.getNumber('first').add(10000)
    buffer = feature.geometry().buffer(distance)
    return feature.setGeometry(buffer)

# Buffer each point to reach the nearest pixel
buffered_features = selected_pixels.map(buffer_feature)

# Extract the biomass value for each buffered region
# This will get the value from nearest valid pixel
nearest_mature = mature_biomass_10k.reduceRegions(
    collection=buffered_features,
    reducer=ee.Reducer.firstNonNull(),
    scale=10000
).map(lambda feature: feature.centroid())

unified_img = age.addBands([
    ESA_CCI, fire, floodable_forests, 
    quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
    categorical, topography, terraclim, soil
])

unified_fc = unified_img.reduceRegions(nearest_mature, ee.Reducer.first(), 30)

task = ee.batch.Export.table.toAsset(
    collection = unified_fc,
    description = 'unified_fc',
    assetId = f"{data_folder}/unified_fc"
)

# to_remove = ['.geo', 'system:index']
# all_properties = unified_img.bandNames().getInfo()
# properties_to_export = [p for p in all_properties if p not in to_remove]

# # Export task to Google Drive
# task = ee.batch.Export.table.toDrive(
#     collection = unified_fc,
#     description = 'unified_fc',
#     fileFormat = "CSV",
#     selectors = properties_to_export
# )

# task.start()



### Export Data for Modelling (with diffrent land use aggregations)


In [16]:
# Loop through each land use image
for land_use, suffix in zip(land_use_list, suffixes):

    pixels_to_sample = categorical.select("biome").addBands(distance_to_deep_forest).updateMask(land_use.select("last_lu")) # selecting only for the pixels with the desired land use history

    selected_pixels = pixels_to_sample.stratifiedSample(numPoints = 10000, classBand = "biome")

    # Buffer each point to reach the nearest pixel
    buffered_features = selected_pixels.map(buffer_feature)

    # Extract the biomass value for each buffered region
    # This will get the value from nearest valid pixel
    nearest_mature = mature_biomass_10k.reduceRegions(
        collection=buffered_features,
        reducer=ee.Reducer.firstNonNull(),
        scale=10000
    )

    unified_img = age.addBands([
        ESA_CCI, fire, floodable_forests, land_use, 
        quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
        categorical, topography, terraclim, soil
    ])

    unified_fc = unified_img.reduceRegions(selected_pixels, ee.Reducer.first(), 30)

    # Sample and export image
    export_csv(unified_img, suffix, 10000, "biome")

In [None]:

suffixes = [
    "aggregated_all",
    "non_aggregated_all",
    # "non_aggregated_15yr",
    "non_aggregated_5yr"
]

land_use_list = [
    ee.Image(f"{data_folder}/land_use_{suffix}")
    for suffix in suffixes
]

land_use = land_use_list[0]


# pixels_to_sample = categorical.select("biome").addBands(distance_to_deep_forest).updateMask(land_use.select("last_lu")) # selecting only for the pixels with the desired land use history

# selected_pixels = pixels_to_sample.stratifiedSample(numPoints = 10000, classBand = "biome")

# # Buffer each point to reach the nearest pixel
# buffered_features = selected_pixels.map(buffer_feature)

# # Extract the biomass value for each buffered region
# # This will get the value from nearest valid pixel
# nearest_mature = mature_biomass_10k.reduceRegions(
#     collection=buffered_features,
#     reducer=ee.Reducer.firstNonNull(),
#     scale=10000
# )

# unified_img = age.addBands([
#     ESA_CCI, fire, floodable_forests, land_use, 
#     quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
#     categorical, topography, terraclim, soil
# ])

# unified_fc = unified_img.reduceRegions(selected_pixels, ee.Reducer.first(), 30)

map = geemap.Map()
map.addLayer(land_use.select("last_lu"), {}, "land_use")
map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childrâ€¦

In [22]:
# points = ee.FeatureCollection(f"{data_folder}/unified_fc1").filter(ee.Filter.eq('ESA_CCI_2020', None))

# # Convert to a FeatureCollection of centroids
# centroid_fc = points.map(lambda feature: feature.centroid())

# map = geemap.Map()
# map.addLayer(centroid_fc, {}, 'centroid_fc')
# map.addLayer(unified_img, {}, 'unified_img')
# map