# Processing the final dataset



In [2]:
import ee
import geemap
from gee_0_utils import *

initialize()
config = ProjectConfig()
roi = config.roi
data_folder = config.data_folder

In [18]:
def keep_rare_lu_types(unified_data):
    # Define the list of bands to check
    bands_to_check = ['lulc_sum_20', 'lulc_sum_21', 'lulc_sum_35', 
                    'lulc_sum_39', 'lulc_sum_40', 'lulc_sum_41', 
                    'lulc_sum_46', 'lulc_sum_48', 'lulc_sum_9']

    # Create a mask where at least one of the specified bands is non-zero
    mask = unified_data.select(bands_to_check).reduce(ee.Reducer.anyNonZero())

    # # Apply the mask to the unified_data
    return unified_data.updateMask(mask)

# to try and reduce some level of spatial autocorrelation:
def sample_by_grid(unified_data):

    region = unified_data.geometry()
    grid = region.coveringGrid(region.projection(), 1000)

    # Function to check if there's at least one cell of unified_data in the grid cell
    def check_overlap(grid_cell):
        geometry = grid_cell.geometry()
        
        sampled = unified_data.stratifiedSample(
            numPoints = 1, classBand = 'biome', region = geometry, scale = 50000
        )

        return sampled.first()

    # Apply the function to each grid cell
    return grid.map(check_overlap, dropNulls = True)



Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [10]:

def export_csv(method, year = None):
    
    secondary = ee.Image(f"{data_folder}/{method}/secondary")
    nearest_mature = ee.Image(f"{data_folder}/{method}/nearest_mature_image")
    yearly_mean_prec = ee.Image(f"{data_folder}/yearly_mean_prec")
    yearly_SI = ee.Image(f"{data_folder}/yearly_SI")
    cwd = ee.Image(f"{data_folder}/raw/cwd_chave")
    sur_cover = ee.Image(f"{data_folder}/{method}/sur_cover").rename("sur_cover")

    unified_data = (
        secondary.addBands(
            [nearest_mature, sur_cover, cwd, yearly_mean_prec, yearly_SI
        ]).updateMask(nearest_mature)
    )

    if method == "mapbiomas":
        if (aggregate_LU):
            suffix = "aggregated"
        else:
            suffix = "non_aggregated"

        if (year != 0):
            suffix += f"_{year}yr"

        land_use_years = ee.Image(f"{data_folder}/{method}/land_use_{suffix}")
        unified_data = unified_data.addBands(land_use_years).updateMask(land_use_years.select("last_LU"))

    # unified_data = keep_rare_lu_types(unified_data)

    unified_data_sampled = unified_data.stratifiedSample(
        numPoints = 10000, classBand = 'biome', region = roi
    )

    # unified_data_sampled = sample_by_grid(unified_data)

    task = ee.batch.Export.table.toDrive(
        collection = unified_data_sampled, description = suffix, fileFormat = 'CSV'
    )
    task.start()

    # task = ee.batch.Export.table.toAsset(
    #     collection = unified_data_sampled, description = f"{suffix}_toAsset", \
    #     assetId = f"{data_folder}/{method}/{suffix}"
    # )
    # task.start()

    return unified_data


aggregate_LU = False
# export_csv("mapbiomas", 0)
export_csv("eu")

In [None]:
secondary_mapbiomas = ee.Image(f"{data_folder}/mapbiomas/secondary")
secondary_eu = ee.Image(f"{data_folder}/eu/secondary")

secondary_mapbiomas = secondary_mapbiomas.updateMask(secondary_eu.select("age"))
secondary_eu = secondary_eu.updateMask(secondary_mapbiomas.select("age"))
secondary_mapbiomas = secondary_mapbiomas.updateMask(secondary_eu.select("age"))

categorical = ee.Image(f"{data_folder}/categorical")
biome_mask = categorical.select('biome').inList([1, 4, 6])
categorical = categorical.updateMask(biome_mask)
cwd = ee.Image(f"{data_folder}/raw/cwd_chave")

unified_data = secondary_mapbiomas.addBands([categorical, cwd, \
                                             secondary_mapbiomas.select("age").rename("age_eu")])\
                                                .updateMask(secondary_mapbiomas.select("age"))

unified_data_sampled = unified_data.stratifiedSample(
    numPoints = 10000, classBand = 'biome', region = roi
)

task = ee.batch.Export.table.toDrive(
    collection = unified_data_sampled, description = "mapbiomas_eu", fileFormat = 'CSV'
)
task.start()


In [16]:
gaul = ee.FeatureCollection('FAO/GAUL_SIMPLIFIED_500m/2015/level0')

south_american_countries = [
    'Argentina', 'Bolivia', 'Chile', 'Colombia', 'Ecuador',
    'French Guiana', 'Guyana', 'Paraguay', 'Peru', 'Suriname', 'Uruguay', 'Venezuela'
]

south_america = gaul.filter(ee.Filter.inList('ADM0_NAME', south_american_countries))
sa = ee.Image.constant(1).clip(south_america).reproject(crs = 'EPSG:4326', scale = 5000)

categorical = ee.Image(f"{data_folder}/categorical")

biome_mask = categorical.select('biome').eq(1) \
.Or(categorical.select('biome').eq(4)) \
.Or(categorical.select('biome').eq(6)).reproject(crs = 'EPSG:4326', scale = 5000)

new_sa = ee.Image(-1).where(sa.mask().Not(), 2) \
                      .where(biome_mask.gt(0), 1) \
                      .where(biome_mask.eq(0), -1)

edge_detec = new_sa.zeroCrossing()
distance_to_border = edge_detec.fastDistanceTransform().sqrt() \
    .multiply(ee.Image.pixelArea().sqrt()).updateMask(biome_mask).rename("distance_to_edge")
distance_to_border_mask = distance_to_border.gt(10000)

# export_image(distance_to_border_mask, "distance_to_border_mask_2", scale = 5000)

In [17]:
# distance_to_border_mask = ee.Image(f"{data_folder}/distance_to_border_mask")
# distance_to_border_mask_2 = ee.Image(f"{data_folder}/distance_to_border_mask_2")

map = geemap.Map()
# map.addLayer(biome_mask, {'min': 0, 'max':1, 'palette': ['white', 'black']}, 'biome_mask')
# map.addLayer(distance_to_border_mask, {'min': 0, 'max':1, 'palette': ['white', 'red']}, 'distance_to_border_mask')
map.addLayer(distance_to_border_mask_2, {'min': 0, 'max':1, 'palette': ['white', 'blue']}, 'distance_to_border_mask_2')
# map.addLayer(edge_detec, {}, 'edge_detec')
map.addLayer(distance_to_border_mask, {'min': 0, 'max':1, 'palette': ['white', 'blue']}, 'distance_to_border_mask')
# map.addLayerControl()
map


Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…