In [9]:
%pip install shapely

Note: you may need to restart the kernel to use updated packages.


In [None]:
import geopandas as gpd
import ee
import pandas as pd
import numpy as np
import time
import geemap
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm  
from shapely.geometry import mapping
import os


In [10]:
ee.Authenticate()
ee.Initialize(project='cropyieldprediction-476612')

In [6]:
gdf = gpd.read_file('LEM_data/LEM_mascaras_v1_shp/LEM_mascaras_2020-05-01_to_2020-06-01_OVERLAP/01_mascaras.shp')

### Pega informaçoes do MapBiomas

In [None]:


mapbiomas_collection_id = 'projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1'
mapbiomas = ee.Image(mapbiomas_collection_id)
gdf = gdf.to_crs(epsg=4326)

ano=2016
print("ANO:" , ano)

def obter_frequencias_para_geometria(geometry):
    """
    Retorna o dicionário de frequências para uma geometria específica.
    """
    geo_json = mapping(geometry)
    ee_geometry = ee.Geometry(geo_json)

    frequencies = mapbiomas.reduceRegion(
        reducer=ee.Reducer.frequencyHistogram(),
        geometry=ee_geometry,
        scale=30,
        maxPixels=1e13
    ).get(f'classification_{ano}')
    return frequencies.getInfo()

def processar_geometria(row):
    try:
        return row.Index, obter_frequencias_para_geometria(row.geometry)
    except Exception as e:
        print(f"Erro ao processar a geometria no índice {row.Index}: {e}")
        return row.Index, None

total_geometrias = len(gdf)
frequencias_por_indice = {}

with ThreadPoolExecutor(max_workers=16) as executor:
    futures = {executor.submit(processar_geometria, row): row.Index for row in gdf.itertuples()}
    with tqdm(total=total_geometrias, desc="Processando Geometrias") as pbar:
        for future in as_completed(futures):
            index, frequencia = future.result()
            frequencias_por_indice[index] = frequencia
            pbar.update(1)

frequencias = np.empty(len(gdf), dtype=object)

for idx, value in frequencias_por_indice.items():
    frequencias[idx] = value

resultado = gdf.copy()
resultado['frequencia'] = frequencias



ANO: 2016


Processando Geometrias: 100%|██████████| 2599/2599 [01:07<00:00, 38.37it/s]


In [8]:
resultado.to_file('LEM_data/LEM_mascaras_v1_shp/LEM_mascaras_2020-05-01_to_2020-06-01_OVERLAP/info_mapbiomas.shp')

### Filtra as mascaras baseado nas informações que pegou

In [9]:
from ast import literal_eval  


lista_agro = [19, 39, 20, 40, 62, 41]

def safe_eval(value):
    try:
        return literal_eval(value)
    except (SyntaxError, ValueError):
        print(f"Erro ao avaliar: {value}")
        return None  
    
def is_agro_dominant(row, agro_keys):
    total_area = sum(row.values())
    agro_area = sum(value for key, value in row.items() if int(key) in agro_keys)
    return (agro_area / total_area) > 0.90 if total_area > 0 else False

gdf = gpd.read_file('LEM_data/LEM_mascaras_v1_shp/LEM_mascaras_2020-05-01_to_2020-06-01_OVERLAP/info_mapbiomas.shp')

gdf['frequencia'] = gdf['frequencia'].apply(safe_eval)
gdf = gdf.dropna(subset=['frequencia'])
filtered_df = gdf[gdf['frequencia'].apply(lambda x: is_agro_dominant(x, lista_agro))]

In [11]:
filtered_df.to_file(f'LEM_data/LEM_mascaras_v2_shp/LEM_mascaras_2020-05-01_to_2020-06-01_OVERLAP/filtro_mapbiomas.shp')