<!-- # Processing the final dataset
 -->


# Export data as CSV

In [3]:
import ee
import geemap
from gee_0_utils import *

initialize()
config = ProjectConfig()
roi = config.roi
data_folder = config.data_folder
last_year = config.last_year

In [None]:
def keep_rare_lu_types(unified_data):
    # Define the list of bands to check
    bands_to_check = ['lulc_sum_20', 'lulc_sum_21', 'lulc_sum_35', 
                    'lulc_sum_39', 'lulc_sum_40', 'lulc_sum_41', 
                    'lulc_sum_46', 'lulc_sum_48', 'lulc_sum_9']

    # Create a mask where at least one of the specified bands is non-zero
    mask = unified_data.select(bands_to_check).reduce(ee.Reducer.anyNonZero())

    # # Apply the mask to the unified_data
    return unified_data.updateMask(mask)


age.pixelLonLat().float()

floodable_forests = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
        .select(f"classification_{last_year}").eq(6))


## Data for all biomass and age data comparisons

In [None]:
biome =  ee.Image(f"{data_folder}/categorical").select("biome")

# ages
mapbiomas = ee.Image(f"{data_folder}/mapbiomas_{last_year}").rename(["age_mapbiomas"])
tmf = ee.Image(f"{data_folder}/tmf_{last_year}").rename(["age_tmf"])
silva = ee.Image("projects/ee-regrowth/assets/MB_secondary_forest_Silva_Junior_v2/sforestAge_brazil_V8").select("classification_2020").rename("age_silva")

# biomass
ESA_CCI = ee.Image(f"{data_folder}/ESA_CCI_{last_year}")
GEDI_L2A = ee.Image(f"{data_folder}/GEDI_L2A_{last_year}")
GEDI_L4A = ee.Image(f"{data_folder}/GEDI_L4A_{last_year}")
heinrich = (ee.Image("projects/ee-regrowth/assets/Heinrich_etal_2021_updates/sforestAGC_climate_only_v1_1")
                            .select("classification_2020").rename("heinrich_biomass_2020"))

# Combine all images into a single multi-band image
comparisons = silva.addBands([ESA_CCI, GEDI_L2A, GEDI_L4A, heinrich, biome])
mask = comparisons.reduce(ee.Reducer.allNonZero())
masked_image = comparisons.updateMask(mask)

comparisons_sampled = comparisons.stratifiedSample(numPoints = 5000, classBand = 'biome')




In [None]:

# Combine all bands with additional masking conditions
combined_mask = GEDI_biomass.mask().And(distance_to_border_mask).And(one_hectare_mask).And(ages_mask).And(land_use.select("last_LU").mask())

unified_data = secondary.addBands([GEDI_biomass, categorical]).updateMask(combined_mask)

unified_data_sampled = unified_data.stratifiedSample(numPoints = 15000, classBand = 'biome', geometries = False)

# Export task to Google Drive
task = ee.batch.Export.table.toDrive(
    collection=unified_data_sampled,
    description='mapbiomas_GEDI',
    fileFormat='CSV',
    selectors=properties_to_export
)
# task.start()




In [23]:
field_data = ee.FeatureCollection(f"{data_folder}/field_biomass")

# Load biome geometries
biomes = ee.FeatureCollection(f"{data_folder}/raw/biomes_br").select('CD_Bioma')

# Define a function to check intersections and set the new property
def add_intersection_property(feature):
    # Filter collection2 for features that intersect with the current feature's geometry
    bioma_number = biomes.filterBounds(feature.geometry()).first().get('CD_Bioma')
    
    # Set the 'intersects' property based on whether there are any intersecting features
    return feature.set('biome', bioma_number)

# Map over field_data to add the 'intersects' property
cd_bioma_value = field_data.map(add_intersection_property)


# Export to Google Drive as CSV
task = ee.batch.Export.table.toDrive(
    collection=cd_bioma_value,
    description='Field_Biomass_with_Biome',
    fileFormat='CSV'
)

# Start the export task
task.start()


In [None]:

fire = (ee.Image("projects/mapbiomas-public/assets/brazil/fire/collection3/mapbiomas_fire_collection3_annual_burned_coverage_v1")
    .select([f"burned_coverage_{year}" for year in config.range_1985_2020])
    .byte()
    .rename([str(year) for year in config.range_1985_2020])
    .gt(0)
    .reduce('sum').rename("num_fires")).unmask(0)

In [48]:


categorical = ee.Image(f"{data_folder}/categorical").select(["biome", "ecoreg", "protec", "indig"])

# select the data points that match their classification as young or old by IPCC
filter1 = ee.Image(f"{data_folder}/raw/00N_10N")
filter2 = ee.Image(f"{data_folder}/raw/10N_20N")
filter_ages = ee.ImageCollection([filter1, filter2]).mosaic()
filter_young_secondary = filter_ages.eq(2)
filter_old_secondary = filter_ages.eq(3)
young_secondary = mapbiomas_age.lte(20).updateMask(filter_young_secondary).unmask(0)
old_secondary = mapbiomas_age.gt(20).updateMask(filter_old_secondary).unmask(0)
ages_mask = young_secondary.add(old_secondary)

distance_to_border_mask = ee.Image(f"{data_folder}/distance_to_border_mask").byte()
one_hectare_mask = ee.Image(f"{data_folder}/mapbiomas/one_hectare_mask").selfMask()

land_use = ee.Image(f"{data_folder}/mapbiomas/land_use_no_fire")

distance_to_nearest_mature = ee.Image(f"{data_folder}/{method}/distance_to_nearest_mature")
cwd = ee.Image(f"{data_folder}/raw/cwd_chave").int16()
sur_cover = ee.Image(f"{data_folder}/{method}/sur_cover")
categorical = ee.Image(f"{data_folder}/categorical").select(["biome", "ecoreg", "protec", "indig"])
topography = ee.Image("CSP/ERGo/1_0/Global/ALOS_landforms").rename("topography")
soil = ee.Image(f"{data_folder}/soilgrids").select(['nitro', 'sand', 'phh2o'])  # Dropping multicollinear bands
climate = ee.Image(f"{data_folder}/yearly_terraclim").select('mean_pr',
 'mean_srad',
 'mean_temp',
 'mean_vpd',
 'mean_soil',
 'mean_aet',
 'mean_si')

nearest_mature = ee.Image(f"{data_folder}/{method}/nearest_mature_biomass_image_neighborhood").rename("nearest_mature_biomass")

amazon_quarter_regions = ee.Image("projects/amazon-forest-regrowth/assets/amazon_quarter_regions")

mean_biomass_quarter = ee.Image("projects/amazon-forest-regrowth/assets/mean_biomass_quarter")

# suffixes = range(1, 22)
# images = [ee.Image(f"{base_name}{suffix}") for suffix in suffixes]
# image_collection = ee.ImageCollection(images)
# nearest_mature = image_collection.mosaic().rename("nearest_mature_biomass").divide(2)

# if method == "mapbiomas":
#     nearest_mature = nearest_mature.reproject(scale = 1000, crs = 'EPSG:4326').rename("nearest_mature_biomass")


In [51]:

# Combine all bands with additional masking conditions
combined_mask = biomass.mask().And(distance_to_border_mask).And(one_hectare_mask).And(ages_mask).And(distance_to_nearest_mature).And(nearest_mature).And(fire.lt(14)).And(land_use.select("last_LU").mask())

unified_data = mapbiomas_age.addBands([silva_age, fire, heinrich_biomass_2020, biomass, categorical, nearest_mature, land_use,
                                       distance_to_nearest_mature, cwd, sur_cover, topography, soil, climate, amazon_quarter_regions, mean_biomass_quarter]).updateMask(combined_mask)

# map = geemap.Map()
# map.addLayer(unified_data, {}, 'unified_data')
# map.addLayer(mapbiomas_age.updateMask(distance_to_border_mask), {'palette': ['blue']}, 'mapbiomas_age')
# # map.addLayer(distance_to_nearest_mature, {}, 'distance_to_nearest_mature')
# map.addLayer(fire.eq(0), {'palette': ['red']}, 'fire')
# map


In [65]:
ESA_CCI = ee.Image("projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_AGB_2020_v51")
mapbiomas_age = ee.Image(f"{data_folder}/mapbiomas/secondary").select("age").rename("mapbiomas_age")

unified_data = mapbiomas_age.addBands([ESA_CCI, categorical.select("biome")]).updateMask(mapbiomas_age.mask())


In [52]:
to_remove = ['.geo', 'system:index']
all_properties = unified_data.bandNames().getInfo()
properties_to_export = [p for p in all_properties if p not in to_remove]

unified_data_sampled = unified_data.stratifiedSample(numPoints = 10000, classBand = 'num_fires', geometries = False)

# Export task to Google Drive
task = ee.batch.Export.table.toDrive(
    collection=unified_data_sampled,
    description='mapbiomas_heinrich_lulc_regions',
    fileFormat='CSV',
    selectors=properties_to_export
)

task.start()



In [37]:
heinrich_biomass_2017 = heinrich_biomass.select("classification_2017").rename("heinrich_biomass_2017")
silva_age = ee.Image("projects/ee-regrowth/assets/MB_secondary_forest_Silva_Junior_v2/sforestAge_brazil_V8").select("classification_2017").rename("silva_age").updateMask(heinrich_biomass_2017)
ESA_CCI = ee.Image("projects/sat-io/open-datasets/ESA/ESA_CCI_AGB/CCI_BIOMASS_100m_AGB_2017_v51").select("AGB").rename("ESA_CCI_2017").divide(2)

unified_data = heinrich_biomass_2017.addBands([silva_age, ESA_CCI])

to_remove = ['.geo', 'system:index']
all_properties = unified_data.bandNames().getInfo()
properties_to_export = [p for p in all_properties if p not in to_remove]

unified_data_sampled = unified_data.stratifiedSample(numPoints = 500, classBand = 'silva_age', geometries = False)

# Export to Google Drive as CSV
task = ee.batch.Export.table.toDrive(
    collection=unified_data_sampled,
    description='heinrich_2017',
    fileFormat='CSV'
)

# Start the export task
task.start()


In [None]:

# Function to preprocess images and create the unified data image
def create_unified_data(method, year):

    distance_to_nearest_mature = ee.Image(f"{data_folder}/{method}/distance_to_nearest_mature")
    cwd = ee.Image(f"{data_folder}/raw/cwd_chave").int16()
    sur_cover = ee.Image(f"{data_folder}/{method}/sur_cover")
    categorical = ee.Image(f"{data_folder}/categorical").select(["biome", "ecoreg", "protec", "indig"])
    topography = ee.Image("CSP/ERGo/1_0/Global/ALOS_landforms").rename("topography")
    soil = ee.Image(f"{data_folder}/soilgrids").select(['nitro', 'sand', 'phh2o'])  # Dropping multicollinear bands
    climate = ee.Image(f"{data_folder}/yearly_terraclim")

    secondary = ee.Image(f"{data_folder}/{method}/secondary")
    ages = secondary.select("age")

    distance_to_border_mask = ee.Image(f"{data_folder}/distance_to_border_mask").byte()
    one_hectare_mask = ee.Image(f"{data_folder}/{method}/one_hectare_mask").selfMask()

    base_name = f"{data_folder}/{method}/nearest_mature_biomass_"
    suffixes = range(1, 22)
    images = [ee.Image(f"{base_name}{suffix}") for suffix in suffixes]
    image_collection = ee.ImageCollection(images)
    nearest_mature = image_collection.mosaic().rename("nearest_mature_biomass")
    if method == "mapbiomas":
        nearest_mature = nearest_mature.reproject(scale = 1000, crs = 'EPSG:4326').rename("nearest_mature_biomass")

    # Combine all bands with additional masking conditions
    combined_mask = nearest_mature.mask().And(distance_to_border_mask).And(one_hectare_mask).And(ages_mask)#.And(GEDI_biomass.mask())

    unified_data = secondary.addBands([# GEDI_biomass, 
        nearest_mature, sur_cover, cwd, distance_to_nearest_mature, 
        categorical, soil, topography, climate
    ])

    if method == "mapbiomas":
        # fire = ee.Image(f"{data_folder}/{method}/ESA_fires")
        # unified_data = unified_data.addBands(fire.rename("ESA"))
        # suffix = "ESA_fire"
        
        if (aggregate_LU):
            suffix = "aggregated"
        else:
            suffix = "non_aggregated"

        if (year != 0):
            suffix += f"_{year}yr"
        else:
            suffix += "_all"

        # lulc = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
        #             .select([f"classification_{year}" for year in config.range_1985_2020])
        #             .byte()
        #             .rename([str(year) for year in config.range_1985_2020]))
        # lulc = lulc.select("1985").eq(3).selfMask()
        land_use = ee.Image(f"{data_folder}/{method}/land_use_{suffix}")
        unified_data = unified_data.addBands([land_use])
        combined_mask = combined_mask.And(land_use.select("last_LU").mask())
    else:
        suffix = "eu"
        fire = ee.Image(f"{data_folder}/{method}/eu_mapbiomas_fires")
        mapbiomas_unburned_mask = ee.Image(f"{data_folder}/{method}/mapbiomas_unburned_mask")
        combined_mask = combined_mask.And(mapbiomas_unburned_mask)

        unified_data = unified_data.addBands([fire])

    unified_data = unified_data.updateMask(combined_mask)

    return unified_data, suffix

# Main Function to run tile-wise exports
def export_tilewise(method, year):
    # Generate unified data image with selected layers
    unified_data, suffix = create_unified_data(method, year)
    # Filter properties to export
    to_remove = ['.geo', 'system:index']
    all_properties = unified_data.bandNames().getInfo()
    properties_to_export = [p for p in all_properties if p not in to_remove]

    # Load region of interest (ROI) and create a grid over the ROI
    grid = roi.coveringGrid("EPSG:4326", 1000000)
    tile_ids = grid.aggregate_array('system:index').getInfo()
    count = 0

    # Loop over IDs
    for feature_id in tile_ids:
        count = count + 1
        feat = grid.filter(ee.Filter.eq('system:index', feature_id))

        unified_data_sampled = unified_data.sample(region = feat.geometry(), scale = 100, geometries = False)

        # Export task to Google Drive
        task = ee.batch.Export.table.toDrive(
            collection=unified_data_sampled,
            description=f'{suffix}_{count}',
            fileFormat='CSV',
            selectors=properties_to_export
        )
        task.start()
        print(f'Started export task {count}')

# Main Function to run tile-wise exports
def export_stratified(method, year):
    # Generate unified data image with selected layers
    unified_data, suffix = create_unified_data(method, year)
    # Filter properties to export
    to_remove = ['.geo', 'system:index']
    all_properties = unified_data.bandNames().getInfo()
    properties_to_export = [p for p in all_properties if p not in to_remove]

    unified_data_sampled = unified_data.stratifiedSample(numPoints = 15000, classBand = 'biome', geometries = False)
    
    # unified_data_sampled = unified_data.sample(region = roi, scale = 30, geometries = False)

    # Export task to Google Drive
    task = ee.batch.Export.table.toDrive(
        collection=unified_data_sampled,
        description=f'{suffix}',
        fileFormat='CSV',
        selectors=properties_to_export
    )
    task.start()

aggregate_LU = True
# export_stratified('eu', 0)
# export_stratified('mapbiomas', 0)
# export_stratified('mapbiomas', 5)
# export_stratified('mapbiomas', 10)
# export_stratified('mapbiomas', 15)
# aggregate_LU = False
# export_stratified('mapbiomas', 0)
# export_stratified('mapbiomas', 5)
# export_stratified('mapbiomas', 10)
# export_stratified('mapbiomas', 15)

# export_tilewise('mapbiomas', 0)
export_tilewise('mapbiomas', 5)
export_tilewise('mapbiomas', 10)
# export_tilewise('mapbiomas', 15)
# aggregate_LU = False
# export_tilewise('mapbiomas', 0)
# export_tilewise('mapbiomas', 5)
# export_tilewise('mapbiomas', 10)
# export_tilewise('mapbiomas', 15)

Started export task 1
Started export task 2
Started export task 3
Started export task 4
Started export task 5
Started export task 6
Started export task 7
Started export task 8
Started export task 9
Started export task 10
Started export task 11
Started export task 12
Started export task 13
Started export task 14
Started export task 15
Started export task 16
Started export task 17
Started export task 18
Started export task 19
Started export task 20
Started export task 21
Started export task 22
Started export task 1
Started export task 2
Started export task 3
Started export task 4
Started export task 5
Started export task 6
Started export task 7
Started export task 8
Started export task 9
Started export task 10
Started export task 11
Started export task 12
Started export task 13
Started export task 14
Started export task 15
Started export task 16
Started export task 17
Started export task 18
Started export task 19
Started export task 20
Started export task 21
Started export task 22


In [8]:
secondary_mapbiomas = ee.Image(f"{data_folder}/mapbiomas/secondary")
secondary_eu = ee.Image(f"{data_folder}/eu/secondary")

secondary_mapbiomas = secondary_mapbiomas.updateMask(secondary_eu.select("age"))
secondary_eu = secondary_eu.updateMask(secondary_mapbiomas.select("age"))
secondary_mapbiomas = secondary_mapbiomas.updateMask(secondary_eu.select("age"))

categorical = ee.Image(f"{data_folder}/categorical")
biome_mask = categorical.select('biome').eq(1) \
               .Or(categorical.select('biome').eq(4)) \
               .Or(categorical.select('biome').eq(6))
categorical = categorical.updateMask(biome_mask)
cwd = ee.Image(f"{data_folder}/raw/cwd_chave")

unified_data = secondary_mapbiomas.addBands([categorical, cwd, \
                                             secondary_mapbiomas.select("age").rename("age_eu")])\
                                                .updateMask(secondary_mapbiomas.select("age"))

unified_data_sampled = unified_data.stratifiedSample(
    numPoints = 10000, classBand = 'biome', region = roi
)

task = ee.batch.Export.table.toDrive(
    collection = unified_data_sampled, description = "mapbiomas_eu", fileFormat = 'CSV'
)
task.start()

In [None]:
# to get the data from current agricultural land, to predict its regrowth potential:

# get all land use history for all plots that are currently agricultural land
# get the nearest mature biomass for each of these plots
# get the same predictors and run the model for these pixels.

canopy_height = ee.Image('users/nlang/ETH_GlobalCanopyHeight_2020_10m_v1').clip(roi).rename('canopy_height')

if generate_lulc_pred:
    lulc = (ee.Image("projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1")
            .select([f"classification_{year}" for year in config.range_1985_2020])
            .byte()
            .rename([str(year) for year in config.range_1985_2020]))

    age = remap_band(f"{last_year}", lulc)
