<!-- # Processing the final dataset
 -->


# Export data as CSV

In [21]:
import ee
import geemap
from utils import *

initialize()
config = ProjectConfig()
roi = config.roi
data_folder = config.data_folder
last_year = config.last_year


## Main Model Dataset

### Age, Biomass

In [22]:
# Fire and Land Use
# age = ee.Image(f"{data_folder}/mapbiomas_{last_year}")

age = ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_secondary_vegetation_age_v1").select("secondary_vegetation_age_2020").rename("age")

ESA_CCI = ee.ImageCollection("projects/sat-io/open-datasets/ESA/ESA_CCI_AGB").filterDate('2019-01-01','2021-01-01').first().select("AGB").rename("biomass")

fire = (ee.Image("projects/mapbiomas-public/assets/brazil/fire/collection3/mapbiomas_fire_collection3_annual_burned_coverage_v1")
    .select([f"burned_coverage_{year}" for year in config.range_1985_2020])
    .byte()
    .rename([str(year) for year in config.range_1985_2020])
    .gt(0)
    .reduce('sum').rename("num_fires")).unmask(0)

floodable_forests = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
        .select(f"classification_{last_year}").eq(6)).rename("floodable_forests")

secondary_area = ee.Image(f"{data_folder}/secondary_area_1km").rename("area")
pasture_area = ee.Image(f"{data_folder}/pastureland_area_1km").rename("area")

mapbiomas_2020 = ee.Image("projects/amazon-forest-regrowth/assets/mapbiomas_2020").gt(0).unmask(0).Not().rename("edge")


### Surrounding Landscape

In [23]:
quarters_ecoreg_biomass = ee.Image("projects/amazon-forest-regrowth/assets/quarters_ecoreg_biomass")
ecoreg = ee.Image("projects/amazon-forest-regrowth/assets/ecoreg")
distance_deep_forest = ee.Image(f"{data_folder}/distance_deep_forest").rename("dist")
sur_cover = ee.Image(f"{data_folder}/sur_cover")

distance_gt_1000 = distance_deep_forest.gt(1000).selfMask()
mature_biomass = ee.Image(f"{data_folder}/mature_biomass")
mature_biomass_10k = ee.Image(f"{data_folder}/mature_biomass_10k")
nearest_mature = ee.Image(f"{data_folder}/nearest_mature")

### Environmental

In [24]:
categorical = ee.Image(f"{data_folder}/categorical")

topography = ee.Image("CSP/ERGo/1_0/Global/ALOS_landforms").rename("topography") # 90m resolution

soil = ee.Image(f"{data_folder}/soilgrids")

terraclim = ee.Image(f"{data_folder}/terraclim_1958_2019")

### Export CSV


In [None]:
def reduce_reproject(image, reducer):
    """Reproject image using mean."""
    return image.reduceResolution(
        reducer=reducer
    ).reproject(
        crs=age.projection(),
        scale=age.projection().nominalScale()
    )


# Reproject continuous variables using mean
continuous_vars = [
    nearest_mature.rename("nearest_mature"), # double
    soil, # float
    terraclim.select(["mean_soil", "mean_vpd", "mean_temp", "mean_def", 
                      "mean_srad", "mean_pr", "mean_aet", "mean_pdsi"]) # float and int16
]

# Reproject categorical variables using first
categorical_vars = [
    fire, # int64
    ecoreg, # int16
    categorical, # int8
    topography # int8
]

# Create unified dataset by directly combining bands
unified_data = ee.Image.cat([
    age, # int8
    floodable_forests,
    distance_deep_forest, # int16
    sur_cover, # float
    *[reduce_reproject(var, ee.Reducer.mean()) for var in continuous_vars],
    *[reduce_reproject(var, ee.Reducer.first()) for var in categorical_vars],
    ee.Image.pixelLonLat().rename(['lon', 'lat'])
])

unified_data_pasture = ee.Image.cat([
    unified_data,
    pasture_area.rename('pasture_area')
])

unified_data_secondary = ee.Image.cat([
    unified_data,
    reduce_reproject(ESA_CCI.rename("biomass"), ee.Reducer.mean()),
    secondary_area, #double
    quarters_ecoreg_biomass, mapbiomas_2020
])

Since processing gets too heavy if we try to extract each point at a time for the entire Amazon at 1km resolution, we do it in chunks:

In [26]:
def export_csv(name, image, grid_size, n_chunks = 1, lu_name = None):

    properties_to_export = image.bandNames().getInfo()
    
    # Get FeatureCollection size and calculate chunk size
    grid = ee.FeatureCollection(f"{data_folder}/grid_{grid_size}k_amazon_{name}_allpixels")
    total_features = grid.size().getInfo()
    chunk_size = int(total_features * 1/n_chunks)

    if lu_name:
        name = f"{name}_{lu_name}"

    def process_chunk(chunk_index):
        start = chunk_index * chunk_size
        chunk = grid.toList(chunk_size, start)
        selected_pixels = ee.FeatureCollection(chunk)

        unified_fc = image.reduceRegions(selected_pixels, ee.Reducer.first(), 30)

        task = ee.batch.Export.table.toDrive(
            collection = unified_fc,
            description = f"grid_{grid_size}k_{name}_{chunk_index}",
            fileFormat = "CSV",
            selectors = [p for p in properties_to_export if p not in ['system:index', '.geo']]
        )
        task.start()

    for i in range(n_chunks):
        if i*chunk_size < total_features:
            process_chunk(i)

# export_csv("pastureland", unified_data_pasture, 1, n_chunks = 60)
# export_csv("secondary", unified_data, 1, n_chunks = 30)
export_csv("secondary", unified_data_secondary, 10, n_chunks = 30)


In [None]:
tmf = ee.Image(f"{data_folder}/tmf_{last_year}_1").rename("tmf")

ESA_CCI = ee.ImageCollection("projects/sat-io/open-datasets/ESA/ESA_CCI_AGB").filterDate('2019-01-01','2021-01-01').first().select("AGB").rename("biomass")

# ESA_CCI_resampled = ESA_CCI.reduceResolution(
#         reducer= ee.Reducer.mean(),
#     ).reproject(
#         crs=tmf.projection(),
#         scale=tmf.projection().nominalScale()
#     )

tmf_mask = tmf.gt(0).selfMask().rename("tmf_mask")

tmf_ESA = ESA_CCI.addBands(tmf)#.addBands(tmf_mask)

# export_csv("secondary", tmf_ESA, 1, n_chunks = 30)


## Different land use aggregations


In [None]:
suffixes = [
    "aggregated_all",
    "non_aggregated_all",
    "non_aggregated_10yr",
    "non_aggregated_5yr"
]

for suffix in suffixes:
    combined_image = ee.Image.cat([
        unified_data, 
        ee.Image(f"{data_folder}/land_use_{suffix}")
    ])

# export_csv("secondary", combined_image, 10, n_chunks=30, lu_name=suffix)


## Mature Forest

In [None]:
biomes = ee.Image(f"{data_folder}/categorical").select("biome")
biomes_mask = biomes.eq(1).rename("biome_mask")

lulc = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
            .select([f"classification_{year}" for year in config.range_1985_2020])
            .byte()
            .rename([str(year) for year in config.range_1985_2020]))

mature_mask = lulc.eq(3).reduce(ee.Reducer.allNonZero()).selfMask().updateMask(biomes_mask)


distance_forest_edge = ee.Image(f"{data_folder}/distance_forest_edge")

biomass_raw = (ee.Image(f"projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_AGB_{last_year}_v51").select("AGB").rename(f"ESA_CCI_{last_year}"))



map = geemap.Map()
map.addLayer(distance_forest_edge.updateMask(mature_mask),
             {},
             "Distance to Forest Edge")
map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childrâ€¦