Testing Time

In [None]:
import geopandas as gpd
from shapely.geometry import box
import rasterio
import numpy as np
import pandas as pd

# Raster paths
raster_path_2020 = "../../../../data/preprocessing/worldcover/worldcover_2020_clipped.tif"
raster_path_2021 = "../../../../data/preprocessing/worldcover/worldcover_2021_clipped.tif"

# Load raster to get extent and resolution
with rasterio.open(raster_path_2020) as src:
    raster_bounds = src.bounds
    raster_transform = src.transform
    pixel_size = src.res[0]  # assuming square pixels
    nodata = src.nodata

# Generate grid polygons with 10m x 10m size
grid_polygons = []

xmin, ymin, xmax, ymax = raster_bounds

x_start = xmin
while x_start < xmax:
    y_start = ymin
    while y_start < ymax:
        # Create a 10m x 10m polygon
        grid_polygons.append( box(x_start, y_start, x_start + 10, y_start + 10) )
        y_start += 10
    x_start += 10

# Create GeoDataFrame from grid polygons
grid_gdf = gpd.GeoDataFrame({'geometry': grid_polygons})
grid_gdf.crs = src.crs


In [27]:
# Function to sample raster values at polygon centroids
def sample_raster_values(raster_path, geodf):
    with rasterio.open(raster_path) as src:
        # Sample raster values at the centroid of each polygon
        centroids = geodf.geometry.centroid
        values = []

        for point in centroids:
            for val in src.sample([ (point.x, point.y) ]):
                v = val[0]
                # Replace no-data with NaN for clarity
                if v == nodata:
                    v = np.nan
                values.append(v)
    return values

# Sample values from both rasters
grid_gdf['value_2020'] = sample_raster_values(raster_path_2020, grid_gdf)
grid_gdf['value_2021'] = sample_raster_values(raster_path_2021, grid_gdf)


In [31]:
# Remove polygons where either year has NaN values
grid_gdf_clean = grid_gdf.dropna(subset=['value_2020', 'value_2021']).copy()

print(f"Removed polygons with null values. Remaining polygons: {len(grid_gdf_clean)}")


Removed polygons with null values. Remaining polygons: 32154


In [None]:
# Optional: Save the resulting polygon layer with the raster values
grid_gdf_clean.to_file("../../../../data/analysis/worldcover/worldcover_time.gpkg", layer='grid', driver="GPKG")
print("Saved 10m grid with raster values.")


Saved 10m grid with raster values.
