In [0]:
%run ./variables

In [0]:
from pyspark.sql.functions import lit
from pyspark.sql.types import StringType, StructType, StructField, ArrayType, DoubleType
from shapely.wkb import loads as wkb_loads
import json

In [0]:
geojson_schema_polygon = StructType([
    StructField("type", StringType(), True),
    StructField("coordinates", ArrayType(
        ArrayType(
            ArrayType(
                DoubleType()
            )
        )
    ), True)
])

geojson_schema_multipolygon = StructType([
    StructField("type", StringType(), True),
    StructField("coordinates", ArrayType(
        ArrayType(
            ArrayType(
                ArrayType(
                    DoubleType()
                )
            )
        )
    ), True)
])

In [0]:
def substitute_fields(bronze_df, source_id, target_id):
    # Get values from source_id row
    source_row = bronze_df.filter(col("object_id") == source_id).select(
        "name", "identification", "limit_municipality_id"
    ).limit(1).collect()[0]
    name_val = source_row["name"]
    identification_val = source_row["identification"]
    limit_municipality_id_val = source_row["limit_municipality_id"]

    # Replace values in target_id row
    bronze_df = bronze_df.withColumn(
        "name",
        when(col("object_id") == target_id, lit(name_val)).otherwise(col("name"))
    ).withColumn(
        "identification",
        when(col("object_id") == target_id, lit(identification_val)).otherwise(col("identification"))
    ).withColumn(
        "limit_municipality_id",
        when(col("object_id") == target_id, lit(limit_municipality_id_val)).otherwise(col("limit_municipality_id"))
    )
    return bronze_df

In [0]:
def wkb_to_geojson(wkb_bytes):
    """
    Converts WKB (Well-Known Binary) geometry bytes to a GeoJSON string.
    Args:
        wkb_bytes (bytes): The WKB geometry as bytes.
    Returns:
        str or None: The GeoJSON representation as a string, or None if conversion fails.
    """
    try:
        geom = wkb_loads(bytes(wkb_bytes))
        return json.dumps(geom.__geo_interface__)
    except Exception as e:
        return None

wkb_to_geojson_udf = udf(wkb_to_geojson, StringType())