In [None]:
## Sentinel-2 preprocessing 

In [None]:
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.crs import CRS


# List of file paths for the 12 Sentinel-2 bands
file_paths = [
    "/path/to/your/data/2020-08-11-00:00_2020-08-11-23:59_Sentinel-2_L2A_B01_(Raw).tiff",
    "/path/to/your/data/2020-08-11-00:00_2020-08-11-23:59_Sentinel-2_L2A_B02_(Raw).tiff",
    "/path/to/your/data/2020-08-11-00:00_2020-08-11-23:59_Sentinel-2_L2A_B03_(Raw).tiff",
    "/path/to/your/data/2020-08-11-00:00_2020-08-11-23:59_Sentinel-2_L2A_B04_(Raw).tiff",
    "/path/to/your/data/2020-08-11-00:00_2020-08-11-23:59_Sentinel-2_L2A_B05_(Raw).tiff",
    "/path/to/your/data/2020-08-11-00:00_2020-08-11-23:59_Sentinel-2_L2A_B06_(Raw).tiff",
    "/path/to/your/data/2020-08-11-00:00_2020-08-11-23:59_Sentinel-2_L2A_B07_(Raw).tiff",
    "/path/to/your/data/2020-08-11-00:00_2020-08-11-23:59_Sentinel-2_L2A_B08_(Raw).tiff",
    "/path/to/your/data/2020-08-11-00:00_2020-08-11-23:59_Sentinel-2_L2A_B8A_(Raw).tiff",
    "/path/to/your/data/2020-08-11-00:00_2020-08-11-23:59_Sentinel-2_L2A_B09_(Raw).tiff",
    "/path/to/your/data/2020-08-11-00:00_2020-08-11-23:59_Sentinel-2_L2A_B11_(Raw).tiff",
    "/path/to/your/data/2020-08-11-00:00_2020-08-11-23:59_Sentinel-2_L2A_B12_(Raw).tiff"
]


def get_resolution(file_path):
    """
    Get the spatial resolution (pixel size) of a TIFF file.

    Parameters:
        file_path (str): Path to the raster file.

    Returns:
        tuple: (resolution_x, resolution_y) in meters.
    """
    with rasterio.open(file_path) as src:
        return src.res


def project_raster(input_file, output_file, target_crs):
    """
    Reproject a raster to a specified coordinate reference system (CRS).

    Parameters:
        input_file (str): Path to the input raster.
        output_file (str): Path to the output reprojected raster.
        target_crs (CRS): Target coordinate reference system.
    """
    with rasterio.open(input_file) as src:
        transform, width, height = calculate_default_transform(
            src.crs, target_crs, src.width, src.height, *src.bounds
        )

        kwargs = src.meta.copy()
        kwargs.update({
            "crs": target_crs,
            "transform": transform,
            "width": width,
            "height": height
        })

        with rasterio.open(output_file, "w", **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=target_crs,
                    resampling=Resampling.nearest
                )


def resample_raster(input_file, output_file, target_res_x, target_res_y):
    """
    Resample a raster to a specified spatial resolution.

    Parameters:
        input_file (str): Path to the input raster.
        output_file (str): Path to the output resampled raster.
        target_res_x (float): Desired pixel width in meters.
        target_res_y (float): Desired pixel height in meters.
    """
    with rasterio.open(input_file) as src:
        transform = rasterio.Affine(
            target_res_x, 0.0, src.bounds.left, 0.0, -target_res_y, src.bounds.top
        )
        width = int((src.bounds.right - src.bounds.left) / target_res_x)
        height = int((src.bounds.top - src.bounds.bottom) / target_res_y)

        kwargs = src.meta.copy()
        kwargs.update({
            "transform": transform,
            "width": width,
            "height": height,
            "dtype": "float32"
        })

        with rasterio.open(output_file, "w", **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=src.crs,
                    resampling=Resampling.bilinear
                )


for path in file_paths:
    res_x, res_y = get_resolution(path)
    print(f"Spatial resolution of {path.split('/')[-1]}: {res_x}m x {res_y}m")

for path in file_paths:
    output_path = f"{path.split('.')[0]}_proj.tiff"
    project_raster(path, output_path, CRS.from_epsg(32723))

for path in file_paths:
    projected_path = f"{path.split('.')[0]}_proj.tiff"
    res_x, res_y = get_resolution(projected_path)
    print(f"Projected resolution of {projected_path.split('/')[-1]}: {res_x}m x {res_y}m")

for path in file_paths:
    projected_path = f"{path.split('.')[0]}_proj.tiff"
    output_resampled_path = f"{projected_path.split('.')[0]}_10m.tiff"
    resample_raster(projected_path, output_resampled_path, 10, 10)

In [None]:
## OpenStreetMap preprocessing 

In [None]:
import osmnx as ox
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt


def download_road_network(place_name):
    """
    Download and convert a road network from OpenStreetMap for a specified place.

    Parameters:
        place_name (str): The name of the place to extract the network.

    Returns:
        tuple: (nodes GeoDataFrame, edges GeoDataFrame)
    """
    graph = ox.graph_from_place(place_name, network_type="drive")

    # Plot the road network
    fig, ax = plt.subplots(figsize=(10, 10))
    ox.plot_graph(
        graph,
        ax=ax,
        bgcolor="#ffffff",
        node_color="#1f77b4",
        node_size=0,
        edge_color="#1f77b4",
        edge_linewidth=0.2,
    )
    plt.show()

    # Convert graph to GeoDataFrames
    nodes, edges = ox.graph_to_gdfs(graph)

    # Clean up attributes
    nodes = nodes.drop(columns=["highway", "ref"], errors="ignore")
    edges = edges.drop(
        columns=[
            "maxspeed", "bridge", "ref", "access", "tunnel", "junction", "width",
            "area", "service", "osmid", "name", "highway", "reversed"
        ],
        errors="ignore",
    )

    # Convert data types safely
    nodes["street_count"] = pd.to_numeric(nodes["street_count"], errors="coerce")
    edges["oneway"] = edges["oneway"].astype(float, errors="ignore")
    edges["lanes"] = pd.to_numeric(edges["lanes"], errors="coerce").fillna(1.0)

    # Reset index
    edges_reset = edges.droplevel("key").reset_index()

    return nodes, edges_reset


def extract_industrial_zones(place_name, output_path):
    """
    Extract industrial and commercial land use polygons from OpenStreetMap.

    Parameters:
        place_name (str): The place to extract geometries from.
        output_path (str): Path to save the resulting shapefile.
    """
    tags = {
        "landuse": [
            "industrial", "warehouse", "commercial", "retail", "factory",
            "construction", "grass", "cemetery", "railway", "recreation_ground",
            "farmland"
        ],
        "leisure": ["sports_centre", "recreation_ground"],
        "aeroway": "aerodrome",
        "building": "industrial",
    }

    geometries = ox.geometries_from_place(place_name, tags)
    gdf = gpd.GeoDataFrame(geometries[["geometry"]])
    gdf = gdf[gdf.geom_type.isin(["Polygon", "MultiPolygon"])]

    # Reproject to UTM Zone 23S (EPSG:32723)
    gdf_utm = gdf.to_crs(epsg=32723)
    gdf_utm.to_file(output_path)


place = "Rio de Janeiro, Brazil"

nodes, edges = download_road_network(place)
nodes.to_file("/path/to/your/data/osm_nodes.shp")
edges.to_file("/path/to/your/data/osm_edges.shp")

extract_industrial_zones(place, "/path/to/your/output/open_street_map.shp")

In [None]:
## GHSL and DEM preprocessing  

In [None]:
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.crs import CRS


def get_resolution(file_path):
    """
    Get the spatial resolution (pixel size) of a TIFF file.

    Parameters:
        file_path (str): Path to the raster file.

    Returns:
        tuple: (resolution_x, resolution_y) in meters.
    """
    with rasterio.open(file_path) as src:
        return src.res


def project_raster(input_file, output_file, target_crs):
    """
    Reproject a raster to a specified coordinate reference system (CRS).

    Parameters:
        input_file (str): Path to the input raster.
        output_file (str): Path to the output reprojected raster.
        target_crs (CRS): Target coordinate reference system.
    """
    with rasterio.open(input_file) as src:
        transform, width, height = calculate_default_transform(
            src.crs, target_crs, src.width, src.height, *src.bounds
        )

        kwargs = src.meta.copy()
        kwargs.update({
            "crs": target_crs,
            "transform": transform,
            "width": width,
            "height": height
        })

        with rasterio.open(output_file, "w", **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=target_crs,
                    resampling=Resampling.bilinear
                )


def resample_raster(input_file, output_file, target_res_x, target_res_y):
    """
    Resample a raster to a specified spatial resolution.

    Parameters:
        input_file (str): Path to the input raster.
        output_file (str): Path to the output resampled raster.
        target_res_x (float): Desired pixel width in meters.
        target_res_y (float): Desired pixel height in meters.
    """
    with rasterio.open(input_file) as src:
        transform = rasterio.Affine(
            target_res_x, 0.0, src.bounds.left, 0.0, -target_res_y, src.bounds.top
        )
        width = int((src.bounds.right - src.bounds.left) / target_res_x)
        height = int((src.bounds.top - src.bounds.bottom) / target_res_y)

        kwargs = src.meta.copy()
        kwargs.update({
            "transform": transform,
            "width": width,
            "height": height,
            "dtype": "float32"
        })

        with rasterio.open(output_file, "w", **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=src.crs,
                    resampling=Resampling.bilinear
                )


input_path = "/path/to/your/data/copernicus_dem.tif"

res_x, res_y = get_resolution(input_path)
print(f"Original resolution: {res_x} m × {res_y} m")

projected_path = f"{input_path.rsplit('.', 1)[0]}_proj.tiff"
project_raster(input_path, projected_path, CRS.from_epsg(32723))

res_x, res_y = get_resolution(projected_path)
print(f"After reprojection: {res_x} m × {res_y} m")

resampled_path = f"{projected_path.rsplit('.', 1)[0]}_10m.tiff"
resample_raster(projected_path, resampled_path, 10, 10)

res_x, res_y = get_resolution(resampled_path)
print(f"After resampling: {res_x} m × {res_y} m")