<!-- # Processing the final dataset
 -->


# Export data as CSV

In [2]:
import ee
import geemap
from gee_0_utils import *

initialize()
config = ProjectConfig()
roi = config.roi
data_folder = config.data_folder
last_year = config.last_year


## Main Model Dataset

### Age, Biomass

In [3]:
# Fire and Land Use
age = ee.Image(f"{data_folder}/mapbiomas_{last_year}")

# ESA_CCI = (ee.Image(f"projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_AGB_{last_year}_v51").select("AGB").rename("biomass"))

ESA_CCI = ee.Image(f"{data_folder}/raw/biomass").rename("biomass")

fire = (ee.Image("projects/mapbiomas-public/assets/brazil/fire/collection3/mapbiomas_fire_collection3_annual_burned_coverage_v1")
    .select([f"burned_coverage_{year}" for year in config.range_1985_2020])
    .byte()
    .rename([str(year) for year in config.range_1985_2020])
    .gt(0)
    .reduce('sum').rename("num_fires")).unmask(0)

floodable_forests = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
        .select(f"classification_{last_year}").eq(6)).rename("floodable_forests")


### Surrounding Landscape

In [4]:
quarters_ecoreg_biomass = ee.Image("projects/amazon-forest-regrowth/assets/quarters_ecoreg_biomass")
distance_to_forest_edge = ee.Image(f"{data_folder}/distance_forest_edge")
sur_cover = ee.Image(f"{data_folder}/sur_cover")

distance_gt_1000 = distance_to_forest_edge.gt(1000).selfMask()
mature_biomass = ee.Image(f"{data_folder}/mature_biomass")
mature_biomass_10k = ee.Image(f"{data_folder}/mature_biomass_10k")
nearest_mature = ee.Image(f"{data_folder}/nearest_mature")

### Environmental

In [30]:
categorical = ee.Image(f"{data_folder}/categorical")

topography = ee.Image("CSP/ERGo/1_0/Global/ALOS_landforms").rename("topography") # 90m resolution

soil = ee.Image(f"{data_folder}/soilgrids")

terraclim = ee.Image(f"{data_folder}/yearly_terraclim") # 10,000m resolution

# CMIP6
historical = ee.Image(f"{data_folder}/CMIP6_historical")
ssp126 = ee.Image(f"{data_folder}/CMIP6_ssp126")
ssp245 = ee.Image(f"{data_folder}/CMIP6_ssp245")
ssp585 = ee.Image(f"{data_folder}/CMIP6_ssp585")

### Export CSV

- Total secondary forest area per 1km2
- Total pasture area per 1km2

In [31]:
lulc = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
            .select([f"classification_{year}" for year in config.range_1985_2020])
            .byte()
            .rename([str(year) for year in config.range_1985_2020]))

pastureland = lulc.select("2020").eq(15).unmask(0).rename("pastureland")

# Aggregate the high-resolution pixels into the 1km grid
def reproject_export_image(image, name):
    image_area = image.reduceResolution(
        reducer = ee.Reducer.mean(),
        maxPixels = 1024,
        bestEffort = True # Use all pixels that can fit in the larger pixel
    ).reproject(
        crs = 'EPSG:4326',
        scale = 1000
    ).rename(name)

    export_image(image_area, name, region = roi, scale = 1000)


# reproject_export_image(pastureland, "pasture_area")
# reproject_export_image(age.gt(0).unmask(0), "secondary_area")

- 1 secondary forest pixel per 1km2
- 1 pasture pixel per 1km2
- 1 centroid per 50km2 (for visualization for the full Amazon)

In [20]:
# create_grid(age_mask, "amazon", cell_size = 1000, file_name = "secondary")
# create_grid(pastureland, "amazon", cell_size = 1000, file_name = "pastureland")

# amazon = ee.Image(f"{data_folder}/categorical").select("biome").eq(1).rename("amazon")
# create_grid(amazon, "amazon", cell_size = 50000, file_name = "all")

Since processing gets too heavy if we try to extract each point at a time for the entire Amazon at 1km resolution, we do it in chunks:

In [None]:
unified_data = ESA_CCI.addBands([
    fire, quarters_ecoreg_biomass,
    nearest_mature, distance_to_forest_edge, sur_cover,
    categorical, topography, soil,
    ee.Image.pixelLonLat().rename(['lon', 'lat']) 
])

def export_csv(name, unified_data, n_chunks = 10):
    
    if (name == "secondary"):
        secondary_area = ee.Image(f"{data_folder}/secondary_area")
        unified_data = unified_data.addBands([age, floodable_forests,
                                              secondary_area, terraclim])
    elif (name == "pastureland"):
        pasture_area = ee.Image(f"{data_folder}/pasture_area")
        unified_data = unified_data.addBands([pasture_area,
                                              terraclim.select(["mean_srad", "mean_soil", "mean_vpd", "mean_pr"])])

    to_remove = ['.geo', 'system:index']
    all_properties = unified_data.bandNames().getInfo()
    properties_to_export = [p for p in all_properties if p not in to_remove]

    grid = ee.FeatureCollection(f"{data_folder}/grid_1k_amazon_{name}")

    # 1. Get FeatureCollection size and calculate chunk size
    total_features = grid.size().getInfo()
    chunk_size = int(total_features * 1/n_chunks)

    def process_chunk(chunk_index):
        start = chunk_index * chunk_size
        chunk = grid.toList(chunk_size, start)
        selected_pixels = ee.FeatureCollection(chunk)

        unified_fc = unified_data.reduceRegions(selected_pixels, ee.Reducer.first(), 30)

        task = ee.batch.Export.table.toDrive(
            collection = unified_fc,
            description = f"grid_1k_amazon_{name}_{chunk_index}",
            fileFormat = "CSV",
            selectors = properties_to_export
        )
        task.start()
    
    for i in range(n_chunks):
        if i*chunk_size < total_features:
            process_chunk(i)
            print(f"Starting chunk {i}")

# export_csv("pastureland", unified_data, n_chunks = 60)
# export_csv("secondary", unified_data, n_chunks = 25)

Starting chunk 0
Starting chunk 1
Starting chunk 2
Starting chunk 3
Starting chunk 4
Starting chunk 5
Starting chunk 6
Starting chunk 7
Starting chunk 8
Starting chunk 9
Starting chunk 10
Starting chunk 11
Starting chunk 12
Starting chunk 13
Starting chunk 14
Starting chunk 15
Starting chunk 16
Starting chunk 17
Starting chunk 18
Starting chunk 19
Starting chunk 20
Starting chunk 21
Starting chunk 22
Starting chunk 23
Starting chunk 24
Starting chunk 25
Starting chunk 26
Starting chunk 27
Starting chunk 28
Starting chunk 29
Starting chunk 30
Starting chunk 31
Starting chunk 32
Starting chunk 33
Starting chunk 34
Starting chunk 35
Starting chunk 36
Starting chunk 37
Starting chunk 38
Starting chunk 39
Starting chunk 40
Starting chunk 41
Starting chunk 42
Starting chunk 43
Starting chunk 44
Starting chunk 45
Starting chunk 46
Starting chunk 47
Starting chunk 48
Starting chunk 49
Starting chunk 50
Starting chunk 51
Starting chunk 52
Starting chunk 53
Starting chunk 54
Starting chunk 55
St

And we export the pixels for the entire amazon for visualization at 50km resolution.

In [None]:
selected_pixels = ee.FeatureCollection(f"{data_folder}/grid_50k_amazon_all")
unified_data = unified_data.addBands(terraclim.select(["mean_srad", "mean_soil", "mean_vpd", "mean_pr"]))

to_remove = ['.geo', 'system:index']
all_properties = unified_data.bandNames().getInfo()
properties_to_export = [p for p in all_properties if p not in to_remove]

unified_fc = unified_data.reduceRegions(selected_pixels, ee.Reducer.first(), 30)

task = ee.batch.Export.table.toDrive(
            collection = unified_fc,
            description = "grid_50k_amazon_all",
            fileFormat = "CSV",
            selectors = properties_to_export
        )

# task.start()


# task = ee.batch.Export.table.toAsset(
#             collection = unified_fc,
#             description = "grid_50k_amazon_all",
#             assetId = "projects/amazon-forest-regrowth/assets/grid_50k_amazon_all_unified")
# task.start()


## Field Data

In [None]:
field_data = ee.FeatureCollection(f"{data_folder}/field_biomass")

unified_img = ESA_CCI.addBands([fire, floodable_forests, 
    quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
    categorical, topography, terraclim, soil
])

unified_field = unified_img.reduceRegions(nearest_mature, ee.Reducer.first(), 30)

# Export task to Google Drive
task = ee.batch.Export.table.toDrive(
    collection = unified_field,
    description = 'unified_field',
    fileFormat = "CSV"
)
# task.start()



In [None]:
field_data = ee.FeatureCollection(f"{data_folder}/field_biomass") # from https://github.com/forc-db/GROA/tree/master/data
biomes = ee.FeatureCollection(f"{data_folder}/raw/biomes_br").select('CD_Bioma')

# Check in which biome each field plot is located
def determine_biome(feature):
    bioma_number = biomes.filterBounds(feature.geometry()).first().get('CD_Bioma')
    return feature.set('biome', bioma_number)

field_biome = field_data.map(determine_biome)

# Export task to Google Drive
task = ee.batch.Export.table.toDrive(
    collection = field_biome,
    description = "field_biome",
    fileFormat = "CSV"
)

# task.start()

In [None]:
lulc = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
        .select([f"classification_{year}" for year in config.range_1985_2020])
        .byte()
        .rename([str(year) for year in config.range_1985_2020]))

age = ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_secondary_vegetation_age_v1").select("secondary_vegetation_age_2020")

lulc_age = lulc.addBands(age)

field_age_lulc = lulc_age.reduceRegions(
    collection = field_data,
    reducer = ee.Reducer.first(),
    scale = 10000
)

# Export task to Google Drive
task = ee.batch.Export.table.toDrive(
    collection = field_age_lulc,
    description = 'field_age_lulc',
    fileFormat = "CSV"
)
# task.start()

### Export Data for Modelling (with diffrent land use aggregations)


In [16]:
# Loop through each land use image
for land_use, suffix in zip(land_use_list, suffixes):

    pixels_to_sample = categorical.select("biome").addBands(distance_to_deep_forest).updateMask(land_use.select("last_lu")) # selecting only for the pixels with the desired land use history

    selected_pixels = pixels_to_sample.stratifiedSample(numPoints = 10000, classBand = "biome")

    # Buffer each point to reach the nearest pixel
    buffered_features = selected_pixels.map(buffer_feature)

    # Extract the biomass value for each buffered region
    # This will get the value from nearest valid pixel
    nearest_mature = mature_biomass_10k.reduceRegions(
        collection=buffered_features,
        reducer=ee.Reducer.firstNonNull(),
        scale=10000
    )

    unified_img = age.addBands([
        ESA_CCI, fire, floodable_forests, land_use, 
        quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
        categorical, topography, terraclim, soil
    ])

    unified_fc = unified_img.reduceRegions(selected_pixels, ee.Reducer.first(), 30)

    # Sample and export image
    export_csv(unified_img, suffix, 10000, "biome")

In [None]:

suffixes = [
    "aggregated_all",
    "non_aggregated_all",
    # "non_aggregated_15yr",
    "non_aggregated_5yr"
]

land_use_list = [
    ee.Image(f"{data_folder}/land_use_{suffix}")
    for suffix in suffixes
]

land_use = land_use_list[0]


# pixels_to_sample = categorical.select("biome").addBands(distance_to_deep_forest).updateMask(land_use.select("last_lu")) # selecting only for the pixels with the desired land use history

# selected_pixels = pixels_to_sample.stratifiedSample(numPoints = 10000, classBand = "biome")

# # Buffer each point to reach the nearest pixel
# buffered_features = selected_pixels.map(buffer_feature)

# # Extract the biomass value for each buffered region
# # This will get the value from nearest valid pixel
# nearest_mature = mature_biomass_10k.reduceRegions(
#     collection=buffered_features,
#     reducer=ee.Reducer.firstNonNull(),
#     scale=10000
# )

# unified_img = age.addBands([
#     ESA_CCI, fire, floodable_forests, land_use, 
#     quarters_ecoreg_biomass, distance_to_forest_edge, sur_cover,
#     categorical, topography, terraclim, soil
# ])

# unified_fc = unified_img.reduceRegions(selected_pixels, ee.Reducer.first(), 30)

map = geemap.Map()
map.addLayer(land_use.select("last_lu"), {}, "land_use")
map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

## Mature forest biomass comparisons

In [None]:
biome =  ee.Image(f"{data_folder}/categorical").select("biome")
mature_biomass = ee.Image(f"{data_folder}/mature_biomass").addBands(biome)

# export_csv(mature_biomass, "mature_biomass", 10000, "biome")