# Processing the final dataset



In [4]:
import ee
import geemap
from gee_0_utils import *

initialize()
config = ProjectConfig()
roi = config.roi
data_folder = config.data_folder

In [5]:
export_distances = False
aggregate_LU = False

if (aggregate_LU):
    prefix = "aggregated"
else:
    prefix = "non_aggregated"

In [None]:
age_mapbiomas = age.updateMask(age_eu).rename("age_mapbiomas")
age_eu = age_eu.updateMask(age_mapbiomas).rename("age_eu")
age_mapbiomas = age_mapbiomas.updateMask(age_eu).rename("age_mapbiomas")

biomass = biomass.updateMask(age_mapbiomas).int16().rename("agbd")

categorical = ee.Image(f"{data_folder}/categorical")
biome_mask = categorical.select('biome').inList([1, 4, 6])
categorical = categorical.updateMask(biome_mask)
cwd = ee.Image(f"{data_folder}/raw/cwd_chave")

unified_data = age_agbd.addBands([categorical, cwd]).updateMask(age_mapbiomas)

unified_data_sampled = unified_data.stratifiedSample(
    numPoints = 10000, classBand = 'biome', region = age_agbd.geometry()
)

task = ee.batch.Export.table.toDrive(
    collection = unified_data_sampled, description = "mapbiomas_eu_atl", fileFormat = 'CSV'
)
task.start()


In [10]:

def export_csv(method, prefix, year = None):
    
    secondary = ee.Image(f"{data_folder}/{method}/secondary")
    nearest_mature = ee.Image(f"{data_folder}/{method}/nearest_mature_image")
    yearly_mean_prec = ee.Image(f"{data_folder}/yearly_mean_prec")
    yearly_SI = ee.Image(f"{data_folder}/yearly_SI")
    cwd = ee.Image(f"{data_folder}/raw/cwd_chave")
    sur_cover = ee.Image(f"{data_folder}/{method}/sur_cover").rename("sur_cover")

    unified_data = (
        secondary.addBands(
            [nearest_mature, sur_cover, cwd, yearly_mean_prec, yearly_SI
        ]).updateMask(nearest_mature)
    )

    if method == "mapbiomas":
        if (year != 0):
            prefix += f"_{year}yr"

        land_use_years = ee.Image(f"{data_folder}/{method}/land_use_{prefix}")
        unified_data = unified_data.addBands(land_use_years).updateMask(land_use_years.select("last_LU"))

    unified_data_sampled = unified_data.stratifiedSample(
        numPoints = 10000, classBand = 'biome', region = roi
    )

    task = ee.batch.Export.table.toDrive(
        collection = unified_data_sampled, description = prefix, fileFormat = 'CSV'
    )
    task.start()

    # task = ee.batch.Export.table.toAsset(
    #     collection = unified_data_sampled, description = f"{prefix}_toAsset", assetId=f"projects/amazon-forest-regrowth/assets/{prefix}"
    # )
    # task.start()

    return unified_data

unified_data = export_csv(0, prefix)
# export_csv(5, prefix)
# export_csv(10, prefix)
# export_csv(15, prefix)

In [18]:
def keep_rare_lu_types():
    # Define the list of bands to check
    bands_to_check = ['lulc_sum_20', 'lulc_sum_21', 'lulc_sum_35', 
                    'lulc_sum_39', 'lulc_sum_40', 'lulc_sum_41', 
                    'lulc_sum_46', 'lulc_sum_48', 'lulc_sum_9']

    # Create a mask where at least one of the specified bands is non-zero
    mask = unified_data.select(bands_to_check).reduce(ee.Reducer.anyNonZero())

    # # Apply the mask to the unified_data
    unified_data = unified_data.updateMask(mask)

    unified_data_sampled = unified_data.stratifiedSample(
        numPoints = 10000, classBand = 'biome', region = age_agbd.geometry()
    )
    
    export_name = f"{prefix}_neigh_reduced"

# to try and reduce some level of spatial autocorrelation:
def sample_by_grid():

    region = unified_data.geometry()
    grid = region.coveringGrid(region.projection(), 1000)

    # Function to check if there's at least one cell of unified_data in the grid cell
    def check_overlap(grid_cell):
        geometry = grid_cell.geometry()
        
        sampled = unified_data.stratifiedSample(
            numPoints = 1, classBand = 'biome', region = geometry, scale=50000#, geometries = True
        )

        return sampled.first()


    # Apply the function to each grid cell
    unified_data_sampled = grid.map(check_overlap, dropNulls = True)



Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [11]:
gaul = ee.FeatureCollection('FAO/GAUL_SIMPLIFIED_500m/2015/level0')
br_shp = ee.FeatureCollection('projects/amazon-forest-regrowth/assets/raw/brasil_shp')

# Convert the FeatureCollection to an image
br_img = br_shp.reduceToImage(
    properties=['AREA_KM2'],  # Assuming 'first' is the current property name
    reducer=ee.Reducer.first()
).rename('br_img')

br_img = br_img.reproject(proj['crs'], proj['transform'])
br_img = br_img.gt(0)


south_american_countries = [
    'Argentina', 'Bolivia', 'Chile', 'Colombia', 'Ecuador',
    'French Guiana', 'Guyana', 'Paraguay', 'Peru', 'Suriname', 'Uruguay', 'Venezuela'
]

south_america = gaul.filter(ee.Filter.inList('ADM0_NAME', south_american_countries))

south_america_merged = south_america.union().geometry()

edge_detec = mature_mask.unmask(-1).zeroCrossing()
distance_to_edge = edge_detec.fastDistanceTransform().sqrt() \
    .multiply(ee.Image.pixelArea().sqrt()).rename("distance_to_edge")
distance_to_edge = distance_to_edge.gt(1000).selfMask()


# map = geemap.Map()
# map.addLayer(br_img, {}, "br_img")
# map.addLayer(brazil_buffer, {}, "brazil_buffer")
# # map.addLayer(south_america_merged, {}, "largest_polygon")
# map