<!-- # Processing the final dataset
 -->


# Export data as CSV

In [1]:
import ee
import geemap
from gee_0_utils import *

initialize()
config = ProjectConfig()
roi = config.roi
data_folder = config.data_folder
last_year = config.last_year


## Main Model Dataset

### Age, Biomass

In [2]:
# Fire and Land Use
age = ee.Image(f"{data_folder}/mapbiomas_{last_year}")

ESA_CCI = ee.Image(f"projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_AGB_{last_year}_v51").select("AGB").rename("biomass")

fire = (ee.Image("projects/mapbiomas-public/assets/brazil/fire/collection3/mapbiomas_fire_collection3_annual_burned_coverage_v1")
    .select([f"burned_coverage_{year}" for year in config.range_1985_2020])
    .byte()
    .rename([str(year) for year in config.range_1985_2020])
    .gt(0)
    .reduce('sum').rename("num_fires")).unmask(0)

floodable_forests = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
        .select(f"classification_{last_year}").eq(6)).rename("floodable_forests")


### Surrounding Landscape

In [3]:
quarters_ecoreg_biomass = ee.Image("projects/amazon-forest-regrowth/assets/quarters_ecoreg_biomass")
distance_deep_forest = ee.Image(f"{data_folder}/distance_deep_forest")
sur_cover = ee.Image(f"{data_folder}/sur_cover")

distance_gt_1000 = distance_deep_forest.gt(1000).selfMask()
mature_biomass = ee.Image(f"{data_folder}/mature_biomass")
mature_biomass_10k = ee.Image(f"{data_folder}/mature_biomass_10k")
nearest_mature = ee.Image(f"{data_folder}/nearest_mature")

### Environmental

In [4]:
categorical = ee.Image(f"{data_folder}/categorical")

topography = ee.Image("CSP/ERGo/1_0/Global/ALOS_landforms").rename("topography") # 90m resolution

soil = ee.Image(f"{data_folder}/soilgrids")

terraclim = ee.Image(f"{data_folder}/terraclim_1958_2019")


In [7]:

# CMIP6

# Scenarios
scenarios = ["historical", "ssp126", "ssp245", "ssp585"]

# Function to get all images matching a scenario and merge them into one image
def load_scenario_image(scenario):
    asset_list = ee.data.listAssets({'parent': f"{data_folder}/CMIP6"})['assets']
    
    # Filter assets that match the scenario name
    matching_assets = [a['name'] for a in asset_list if a['name'].endswith(f"_{scenario}")]
    
    # Load each image and rename its band(s) to avoid duplicates
    images = []
    for asset in matching_assets:
        name = asset.split("/")[-1].replace(f"_{scenario}", "")  # e.g., "precipitation"
        img = ee.Image(asset)
        band_names = img.bandNames().map(lambda b: ee.String(name).cat("_").cat(b))
        img = img.rename(band_names)
        images.append(img)
    
    # Combine all into one image (bands-wise)
    return ee.ImageCollection(images).toBands()

# Load each scenario
historical = load_scenario_image("historical")
ssp126 = load_scenario_image("ssp126")
ssp245 = load_scenario_image("ssp245")
ssp585 = load_scenario_image("ssp585")

historical

### Export CSV

- Total secondary forest area per 1km2
- Total pasture area per 1km2

In [47]:
lulc = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
            .select([f"classification_{year}" for year in config.range_1985_2020])
            .byte()
            .rename([str(year) for year in config.range_1985_2020]))

pastureland = lulc.select("2020").eq(15).unmask(0).rename("pastureland")

# Aggregate the high-resolution pixels into the 1km grid
def reproject_export_image(image, name):
    image_area = image.reduceResolution(
        reducer = ee.Reducer.mean(),
        maxPixels = 1024,
        bestEffort = True # Use all pixels that can fit in the larger pixel
    ).reproject(
        crs = 'EPSG:4326',
        scale = 1000
    ).rename(name)

    export_image(image_area, name, region = roi, scale = 1000)


# reproject_export_image(pastureland, "pasture_area")
# reproject_export_image(age.gt(0).unmask(0), "secondary_area")

- 1 secondary forest pixel per 1km2
- 1 pasture pixel per 1km2

In [None]:
# create_grid(age.gt(0).unmask(0), "atlantic", cell_size = 1000, file_name = "secondary")
# create_grid(age.gt(0).unmask(0), "amazon", cell_size = 1000, file_name = "secondary")
# create_grid(pastureland, "amazon", cell_size = 1000, file_name = "pastureland")

# make one grid with all pixels that are NOT mature forests for nearest mature forest estimates
# non_mature_forests = mature_biomass_10k.eq(0).unmask(1).selfMask().rename("non_mature_forests")
# create_grid(non_mature_forests, "amazon", cell_size = 10000, file_name = "non_mature_forests")

Since processing gets too heavy if we try to extract each point at a time for the entire Amazon at 1km resolution, we do it in chunks:

In [52]:
unified_data = ESA_CCI.addBands([
    fire, quarters_ecoreg_biomass,
    nearest_mature, distance_deep_forest, sur_cover,
    categorical, topography, soil,
    ee.Image.pixelLonLat().rename(['lon', 'lat'])])

def export_csv(name, unified_data, n_chunks = 10):
    
    if (name == "secondary"):
        secondary_area = ee.Image(f"{data_folder}/secondary_area")
        unified_data = unified_data.addBands([age, floodable_forests,
                                               secondary_area, terraclim])
    elif (name == "pastureland"):
        pasture_area = ee.Image(f"{data_folder}/pasture_area")
        unified_data = unified_data.addBands([pasture_area,
                                              terraclim.select(["mean_srad", "mean_soil", "mean_vpd", "mean_pr", "mean_aet"])])

    to_remove = ['.geo', 'system:index']
    all_properties = unified_data.bandNames().getInfo()
    properties_to_export = [p for p in all_properties if p not in to_remove]

    grid = ee.FeatureCollection(f"{data_folder}/grid_10k_amazon_{name}")

    # 1. Get FeatureCollection size and calculate chunk size
    total_features = grid.size().getInfo()
    chunk_size = int(total_features * 1/n_chunks)

    def process_chunk(chunk_index):
        start = chunk_index * chunk_size
        chunk = grid.toList(chunk_size, start)
        selected_pixels = ee.FeatureCollection(chunk)

        unified_fc = unified_data.reduceRegions(selected_pixels, ee.Reducer.first(), 30)

        task = ee.batch.Export.table.toDrive(
            collection = unified_fc,
            description = f"grid_10k_amazon_{name}_{chunk_index}",
            fileFormat = "CSV",
            selectors = properties_to_export
        )
        task.start()

    for i in range(n_chunks):
        if i*chunk_size < total_features:
            process_chunk(i)

# export_csv("pastureland", unified_data, n_chunks = 60)
export_csv("secondary", unified_data, n_chunks = 25)

## Field Data

In [None]:
field_data = ee.FeatureCollection(f"{data_folder}/field_biomass")

unified_fc = unified_data.reduceRegions(field_data, ee.Reducer.first(), 30)

# Export task to Google Drive
task = ee.batch.Export.table.toDrive(
    collection = unified_fc,
    description = 'unified_field',
    fileFormat = "CSV"
)
# task.start()

## Different land use aggregations


In [None]:
suffixes = [
    "aggregated_all",
    "non_aggregated_all",
    "non_aggregated_10yr",
    "non_aggregated_5yr"
]

land_use_list = [
    ee.Image(f"{data_folder}/land_use_{suffix}")
    for suffix in suffixes
]

land_use = land_use_list[0]

# Loop through each land use image
for land_use, suffix in zip(land_use_list, suffixes):

    selected_pixels = ee.FeatureCollection(f"{data_folder}/grid_10k_amazon_secondary")

    unified_data_land_use = unified_data.addBands(land_use)

    unified_fc = unified_data_land_use.reduceRegions(selected_pixels, ee.Reducer.first(), 30)

    to_remove = ['.geo', 'system:index']
    all_properties = unified_data_land_use.bandNames().getInfo()
    properties_to_export = [p for p in all_properties if p not in to_remove]

    task = ee.batch.Export.table.toDrive(
            collection = unified_fc,
            description = "unified_land_use_" + suffix,
            fileFormat = "CSV",
            selectors = properties_to_export
        )
    task.start()


In [None]:
selected_pixels = ee.FeatureCollection(f"{data_folder}/grid_10k_amazon_secondary")
# WITH BUFFER

mature_biomass_10k = ee.Image(f"{data_folder}/mature_biomass_10k").rename("mature_biomass_10k")

selected_pixels = distance_deep_forest.reduceRegions(
    collection = selected_pixels,
    reducer = ee.Reducer.first(),
    scale = 10000
)

def buffer_feature(feature):
    distance = feature.getNumber('first').add(10000)
    buffer = feature.geometry().buffer(distance)
    return feature.setGeometry(buffer)

# Buffer each point to reach the nearest pixel
buffered_features = selected_pixels.map(buffer_feature)

# Extract the biomass value for each buffered region
# This will get the value from nearest valid pixel
nearest_mature = mature_biomass_10k.reduceRegions(
    collection=buffered_features,
    reducer=ee.Reducer.firstNonNull(),
    scale=10000
).map(lambda feature: feature.centroid())


unified_data = ESA_CCI.addBands(age)

unified_fc = unified_data.reduceRegions(nearest_mature, ee.Reducer.first(), 30)

# Export task to Google Drive
task = ee.batch.Export.table.toDrive(
    collection = unified_fc,
    description = 'unified',
    fileFormat = "CSV"
)
task.start()

# task = ee.batch.Export.table.toAsset(
#     collection = unified_fc,
#     description = 'unified_fc',
#     assetId = f"{data_folder}/unified_fc"
# )
# task.start()