In [None]:
import pathlib
import pyproj
import rasterio
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

from rasterio.features import rasterize
from shapely.geometry import box
from utils import get_credentials, download_composite

## Download of ROIs (regions of interest) from Monitoreo Desmonte

### Generate the ROIs from Monitoreo Desmonte shapefiles

In [None]:
# Define a function to create a region of interest (ROI) around a centroid
def create_roi(centroid: gpd.GeoSeries, buffer_distance: float) -> gpd.GeoSeries:
    """
    Create a region of interest (ROI) around a centroid.

    Args:
    centroid (gpd.GeoSeries): The centroid of the ROI.
    buffer_distance (float): The buffer distance around the centroid.

    Returns:
    gpd.GeoSeries: The region of interest (ROI) in the bounding box geometry.    
    """
    return box(centroid.x - buffer_distance, # xmin or west
               centroid.y - buffer_distance,  # ymin or south
               centroid.x + buffer_distance,  # xmax or east
               centroid.y + buffer_distance) # ymax or north

# Define a function to create a fishnet from a shapefile
def create_fishnet(shapefile_path: pathlib.Path, cell_size: float, output_path: str) -> None:
    """
    Create a fishnet from a shapefile.

    Args:
    shapefile_path (pathlib.Path): The path to the shapefile.
    cell_size (float): The size of the cells.
    output_path (str): The output path for the fishnet in GeoJSON format.
    """

    # Open a shapefile with geopandas
    gdf = gpd.read_file(shapefile_path)

    # Get crs (Assuming that it is in metric coordinates)
    crs = gdf.crs

    # Create the ROI around the centroid of each vector
    rois = []
    for _, row in gdf.iterrows():
        centroid = row['geometry'].centroid
        roi = create_roi(centroid, cell_size // 2)
        rois.append(roi)

    grid_gdf = gpd.GeoDataFrame({'geometry': rois}, crs=crs)

    # Count the number of vectors within each ROI
    grid_gdf['vector_count'] = grid_gdf.apply(lambda row: len(gdf[gdf.geometry.within(row['geometry'])]), 
                                              axis=1)

    # Sort the ROIs by the number of vectors
    grid_gdf = grid_gdf.sort_values(by='vector_count', ascending=False)

    # Generate a set to store the counted vectors
    counted_vectors = set()
    # List to store the final ROIs
    final_rois = []
    # Iterate over the ROIs
    for _, row in grid_gdf.iterrows():
        # Get the vectors within the ROI
        vectors_in_roi = gdf[gdf.geometry.within(row['geometry'])]
        
        # Filter out the vectors that have already been counted
        new_vectors = vectors_in_roi[~vectors_in_roi.index.isin(counted_vectors)]
        
        # If there are new vectors, add the ROI to the final list
        if not new_vectors.empty:
            final_rois.append(row['geometry'])
            counted_vectors.update(new_vectors.index)

    # Create a GeoDataFrame with the final ROIs
    final_rois_gdf = gpd.GeoDataFrame({'id': [f"ROI_{str(i).zfill(4)}" for i in range(len(final_rois))],
                                       'geometry': final_rois}, crs=crs)

    # Save the final ROIs to a GeoJSON file
    final_rois_gdf.to_file(output_path, driver='GeoJSON')

    # Mostrar los ROIs finales
    print(f"Final ROIs from {shapefile_path.name} saved to {output_path}")

In [None]:
# Get credentials
# credentials = get_credentials() 

# Define the paths to the shapefiles
DATA_FOLDER = pathlib.Path("./omdena/data/Cordoba per year (deforestation)") # Replace with the real path to the shapefiles
list_shapefiles_path = list(DATA_FOLDER.glob("*.shp"))

# Create a folder to store the GeoJSON files with the final ROIs
ROOT_FOLDER = pathlib.Path("/media/tidop/Datos_4TB1/databases") # Replace with the real path to the output folder
OMDENA_FOLDER =  ROOT_FOLDER/ "omdena"
OMDENA_FOLDER.mkdir(parents=True, exist_ok=True)

GEOJSON_FOLDER = OMDENA_FOLDER / "geojson"
GEOJSON_FOLDER.mkdir(parents=True, exist_ok=True)

# For 512x512 pixels
cell_size = 5120

# Create the fishnet for each shapefile
for shapefile_path in list_shapefiles_path:
    create_fishnet(shapefile_path, cell_size, f"{GEOJSON_FOLDER}/{shapefile_path.stem}.geojson")

### Download the S2 image composite for all the ROIs

In [None]:
# Get the list of GeoJSON files with the final ROIs
GEOJSON_FILES = sorted(list(GEOJSON_FOLDER.glob("*.geojson")))

# Define the paths to save the input images and ground truth 

rasters_folder = OMDENA_FOLDER / "gt"
rasters_folder.mkdir(parents=True, exist_ok=True)

imagest1_folder = OMDENA_FOLDER / "t1"
imagest1_folder.mkdir(parents=True, exist_ok=True)

imagest2_folder = OMDENA_FOLDER / "t2"
imagest2_folder.mkdir(parents=True, exist_ok=True)

In [None]:
# Counter for logging and data records to make the metadata
counter = 0
data_records = []

# 167 ROIs (2017 - 2023)
for geojson_file in GEOJSON_FILES:
    # Load the GeoJSON file of ROIs
    gdf = gpd.read_file(geojson_file)

    # To get the GeoJSON file of the corresponding vectors in the ROI
    filename = [filename for filename in list_shapefiles_path if geojson_file.stem == filename.stem]
    gt_gdf = gpd.read_file(filename[0])    
    # Get its crs in metric coordinates
    crs = gdf.crs

    # Get the year from the filename
    year = int(geojson_file.stem.split('_')[-1])

    # Iterate over the ROIs
    for index, row in gdf.iterrows():
        # Get the centroid of the ROI
        centroid = row['geometry'].centroid
        # Convert vector of gt_gdf to raster using rasterio
        clipped_vectors = gpd.clip(gt_gdf, row['geometry'])

        # Convert to geographic coordinates
        transfutm2go = pyproj.Transformer.from_crs(f'epsg:{crs.to_epsg()}', 'epsg:4326', always_xy=True)
        lon, lat = transfutm2go.transform(centroid.x, centroid.y)

        try:
            # For t1
            download_composite(lat, lon, imagest1_folder/f"{str(counter).zfill(4)}.tif", 512, 10, f"{year -1}-01-12", f"{year -1}-12-31", 0.6)
            # For t2
            download_composite(lat, lon, imagest2_folder/f"{str(counter).zfill(4)}.tif", 512, 10, f"{year}-01-12", f"{year}-12-31", 0.6)
            
            # Convert clipped vectors to raster
            lx = row['geometry'].bounds[0]
            ly = row['geometry'].bounds[-1]
            transform = rasterio.Affine(10.0, 0.0, lx, 0.0, -10.0, ly)
            out_shape = (512, 512)
            shapes = ((geom, 1) for geom in clipped_vectors.geometry)
            binary_raster = rasterize(shapes, out_shape=out_shape, 
                                      transform=transform, fill=0, 
                                      all_touched=True,
                                      dtype=np.uint8)

            # Save the binary raster (optional)
            with rasterio.open(
                rasters_folder/f"{str(counter).zfill(4)}.tif",
                'w',
                driver='GTiff',
                height=binary_raster.shape[0],
                width=binary_raster.shape[1],
                count=1,
                dtype=binary_raster.dtype,
                crs=crs,
                transform=transform,
            ) as dst:
                dst.write(binary_raster, 1)
            
            ## Create a dataframe with the next data
            data_records.append({
                "year_t1": year - 1,
                "year_t2": year,
                "start_month_t1": 1,
                "end_month_t1": 12,
                "start_month_t2": 1,
                "end_month_t2": 12, 
                "lat": lat,
                "lon": lon,
                "id": f"{str(counter).zfill(4)}"
            })
            counter += 1
            print(f"{index+1}/{len(gdf)}: Processed")      
        
        except Exception as e:
            print(f"Error: {e}")
            print(f"{index+1}/{len(gdf)}: Processed")                
            continue
        
        print(f"Images {str(counter).zfill(4)} for {year} saved")

In [None]:
# Save the metadata to a CSV file
df = pd.DataFrame(data_records)
df.to_csv(OMDENA_FOLDER / "data.csv", index=False)

In [None]:
# Plot the images to visualize the data
# Image folder
image_folder = OMDENA_FOLDER / "plots"
image_folder.mkdir(parents=True, exist_ok=True)

# Plot the T1, T2 and GT rasters
gt_files = sorted(list(rasters_folder.glob("*.tif")))
t1_files = sorted(list(imagest1_folder.glob("*.tif")))
t2_files = sorted(list(imagest2_folder.glob("*.tif")))


# Open file csv
df = pd.read_csv(OMDENA_FOLDER / "data.csv")
df["id"] = df["id"].apply(lambda x: str(x).zfill(4))

for i, (gt_file, t1_file, t2_file) in enumerate(zip(gt_files, t1_files, t2_files)):    
    # Get the information related to the ROI
    record = df[df["id"] == gt_file.stem.split('.')[0]]
    roi = record["id"].values[0]
    year = int(record["year_deforest"].values[0])
    lat = float(record["lat"].values[0])
    lon = float(record["lon"].values[0])

    # Plot the images:
    with rasterio.open(gt_file) as src1, rasterio.open(t1_file) as src2, rasterio.open(t2_file) as src3:
        fig, axs = plt.subplots(1, 4, figsize=(20, 5))
        axs[0].imshow(src2.read([3, 2, 1]).transpose(1, 2, 0) * 6 /10000)
        axs[0].set_title(f"T1 Image - {year - 1}")
        axs[1].imshow(src3.read([3, 2, 1]).transpose(1, 2, 0) * 6 /10000)
        axs[1].set_title(f"T2 Image - {year}")
        axs[2].imshow(src1.read(1), cmap='gray')
        axs[2].set_title("Ground Truth")

        # Plot he countour of the ground truth in the T2 image
        axs[3].contour(src1.read(1), colors='red', levels=[0.5])
        axs[3].imshow(src3.read([3, 2, 1]).transpose(1, 2, 0) * 6 /10000)
        axs[3].set_title(f"GT Contour")


        for ax in axs:
            ax.axis('off')

    plt.suptitle(f"ROI_{roi} [Lat:{lat} , Lon {lon}]")
    fig.savefig(image_folder / f"{roi}.png", bbox_inches='tight', dpi=300)
    plt.close()
    plt.clf()

    # Print the progress
    print(f"[{i + 1}/{len(gt_files)}] Images saved for {roi}")