In [1]:
import os
import ee
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv
from analyze_sites import analyze_site
from typing import Tuple, Dict, Optional
from land_composition_analysis import get_dynamic_world_visualization

# Get the notebook directory
NOTEBOOK_DIR = os.getcwd()

# Get the project root directory (two levels up)
PROJECT_ROOT = os.path.abspath(os.path.join(NOTEBOOK_DIR, '../..'))

# Load environment variables
load_dotenv()

# Get credentials path and ensure it's relative to PROJECT_ROOT
GEE_CREDENTIALS_PATH = os.path.join(PROJECT_ROOT, os.getenv('GEE_CREDENTIALS_PATH'))
GEE_PROJECT_ID = os.getenv('GEE_PROJECT_ID')

# Initialize GEE
credentials = ee.ServiceAccountCredentials(
    '',
    GEE_CREDENTIALS_PATH,
    GEE_PROJECT_ID
)
ee.Initialize(credentials)

In [2]:
def analyze_deforestation_composition(
    lat: float, 
    lon: float, 
    before_date: datetime, 
    after_date: datetime,
    area_of_interest: Optional[ee.Geometry] = None
) -> Dict:
    """
    Analyze land composition change between two dates for potential deforestation.
    
    Args:
        lat (float): Latitude of the point of interest
        lon (float): Longitude of the point of interest
        before_date (datetime): Date for the initial analysis
        after_date (datetime): Date for the final analysis
        area_of_interest (ee.Geometry, optional): Geometry to analyze. If None, uses point
        
    Returns:
        Dict: Dictionary containing the analysis results
    """
    # Get images for both dates
    before_image, _ = get_dynamic_world_visualization(before_date, lat, lon)
    after_image, _ = get_dynamic_world_visualization(after_date, lat, lon)
    
    # Define vegetation classes of interest
    vegetation_classes = ['trees', 'shrub_and_scrub']
    
    # Create masks for vegetation areas
    before_vegetation = before_image.select(vegetation_classes).reduce(ee.Reducer.sum())
    after_vegetation = after_image.select(vegetation_classes).reduce(ee.Reducer.sum())
    
    # Identify areas that changed from vegetation to non-vegetation
    deforested = before_vegetation.gt(0).And(after_vegetation.eq(0))
    
    # If no area provided, create a small buffer around the point
    if area_of_interest is None:
        area_of_interest = ee.Geometry.Point([lon, lat]).buffer(30)
    
    # Calculate statistics
    stats = {
        'before_date': before_date.strftime('%Y-%m-%d'),
        'after_date': after_date.strftime('%Y-%m-%d'),
        'lat': lat,
        'lon': lon,
        'deforested_area': deforested.multiply(ee.Image.pixelArea()).reduceRegion(
            reducer=ee.Reducer.sum(),
            geometry=area_of_interest,
            scale=10
        ).get('sum').getInfo(),
        'before_vegetation_composition': {
            class_name: before_image.select(class_name).reduceRegion(
                reducer=ee.Reducer.mean(),
                geometry=area_of_interest,
                scale=10
            ).get(class_name).getInfo()
            for class_name in vegetation_classes
        }
    }
    
    return stats

def process_deforestation_csv(input_csv: str, output_csv: str, year: int):
    """
    Process deforestation CSV and add Dynamic World analysis.
    
    Args:
        input_csv (str): Path to input CSV
        output_csv (str): Path to output CSV
        year (int): Year to analyze
    """
    # Leer CSV original
    df = pd.read_csv(input_csv)
    
    # Crear columnas para el análisis
    results = []
    
    for _, row in df.iterrows():
        try:
            # Analizar composición usando fechas en la temporada seca (menos nubes)
            before_date = datetime(year-1, 9, 1)  # Septiembre del año anterior
            after_date = datetime(year, 9, 1)     # Septiembre del año actual
            
            analysis = analyze_deforestation_composition(
                lat=row['centroid_lat'],
                lon=row['centroid_lon'],
                before_date=before_date,
                after_date=after_date
            )
            
            # Solo incluir si había vegetación significativa antes
            vegetation_threshold = 0.5  # 50% de cobertura
            total_vegetation = sum(analysis['before_vegetation_composition'].values())
            
            if total_vegetation >= vegetation_threshold:
                results.append({
                    **row.to_dict(),
                    **analysis
                })
                
        except Exception as e:
            print(f"Error processing row {_}: {e}")
            continue
    
    # Crear nuevo DataFrame y guardar
    if results:
        results_df = pd.DataFrame(results)
        results_df.to_csv(output_csv, index=False)
        print(f"Created deforestation analysis CSV: {output_csv} with {len(results)} records")
    else:
        print("No significant deforestation detected in vegetation areas")

In [5]:
import geopandas as gpd
import os

# Leer el shapefile
shp_path = "/Users/franfurey/Desktop/all/projects/omdena/argentina/CordobaArgentinaChapter_MonitoringLandUseTransformation/data/argentina/Coleccion_12.0_Argentina_1976-2023.shp"
gdf = gpd.read_file(shp_path)

# Mostrar las primeras filas y la información del DataFrame
print("Información del shapefile:")
print(gdf.info())
print("\nPrimeras filas:")
print(gdf.head())

print("CRS del shapefile:", gdf.crs)

# Filtrar para Córdoba y el rango de años
gdf['FECHA'] = gdf['FECHA'].astype(int)
cordoba_gdf = gdf[
    (gdf['PROVINCIA'] == 'CORDOBA') & 
    (gdf['FECHA'] >= 2010) & 
    (gdf['FECHA'] <= 2023)
]

# Crear directorio para los archivos de salida
output_dir = "../../data/cordoba_por_anio"
os.makedirs(output_dir, exist_ok=True)

# Guardar un archivo por año
for year in range(2010, 2024):
    year_gdf = cordoba_gdf[cordoba_gdf['FECHA'] == year]
    if not year_gdf.empty:
        output_file = os.path.join(output_dir, f"cordoba_{year}.shp")
        year_gdf.to_file(output_file)
        print(f"Archivo creado: cordoba_{year}.shp con {len(year_gdf)} registros")


# Crear directorio para los CSVs
csv_output_dir = "/Users/franfurey/Desktop/all/projects/omdena/argentina/CordobaArgentinaChapter_MonitoringLandUseTransformation/data/cordoba_por_anio"
os.makedirs(csv_output_dir, exist_ok=True)

print("CRS original:", cordoba_gdf.crs)

for year in range(2010, 2024):
    # Crear una copia explícita para evitar las advertencias
    year_gdf = cordoba_gdf[cordoba_gdf['FECHA'] == year].copy()
    
    if not year_gdf.empty:
        # Calcular centroides mientras estamos en el CRS proyectado (EPSG:22184)
        centroids = year_gdf.geometry.centroid
        
        # Crear un nuevo GeoDataFrame para los centroides
        centroids_gdf = gpd.GeoDataFrame(geometry=centroids, crs=year_gdf.crs)
        
        # Convertir tanto el GeoDataFrame original como los centroides a WGS84
        year_gdf = year_gdf.to_crs('EPSG:4326')
        centroids_gdf = centroids_gdf.to_crs('EPSG:4326')
        
        # Extraer coordenadas de los centroides
        year_gdf['centroid_lat'] = centroids_gdf.geometry.y
        year_gdf['centroid_lon'] = centroids_gdf.geometry.x
        
        # Verificar rango de coordenadas
        print(f"\nAño {year} - Rango de coordenadas:")
        print(f"Latitud: {year_gdf['centroid_lat'].min():.4f} a {year_gdf['centroid_lat'].max():.4f}")
        print(f"Longitud: {year_gdf['centroid_lon'].min():.4f} a {year_gdf['centroid_lon'].max():.4f}")
        
        # Guardar geometrías en WKT
        year_gdf['polygon_geometry_wgs84'] = year_gdf.geometry.to_wkt()
        year_gdf['centroid_geometry_wgs84'] = centroids_gdf.geometry.to_wkt()
        
        # Eliminar columna de geometría antes de guardar a CSV
        year_gdf = year_gdf.drop('geometry', axis=1)
        
        # Guardar a CSV
        csv_file = os.path.join(csv_output_dir, f"cordoba_{year}.csv")
        year_gdf.to_csv(csv_file, index=False)
        print(f"CSV creado: cordoba_{year}.csv con {len(year_gdf)} registros")

Información del shapefile:
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 213779 entries, 0 to 213778
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype   
---  ------     --------------   -----   
 0   FECHA      213779 non-null  int64   
 1   PROVINCIA  213779 non-null  object  
 2   DEPARTAM   213779 non-null  object  
 3   CABECERA   213779 non-null  object  
 4   SUPERF_HA  213779 non-null  float64 
 5   geometry   213779 non-null  geometry
dtypes: float64(1), geometry(1), int64(1), object(3)
memory usage: 9.8+ MB
None

Primeras filas:
   FECHA PROVINCIA         DEPARTAM            CABECERA  SUPERF_HA  \
0   2022     CHACO  ALMIRANTE BROWN  PAMPA DEL INFIERNO   88.12359   
1   2022     CHACO  ALMIRANTE BROWN  PAMPA DEL INFIERNO   89.73054   
2   2022     CHACO  ALMIRANTE BROWN  PAMPA DEL INFIERNO   56.54260   
3   2022     CHACO    INDEPENDENCIA         CAMPO LARGO    4.36480   
4   2022     CHACO  ALMIRANTE BROWN  PAMPA DEL INFIERNO   49.66285   

 