In [None]:
def extract_latest_lookup_values(
    lookup_table: DataFrame,
    lu_name: str,
    lu_name_col: str = "lu_name",
    lu_id_col: str = "lu_id",
    lu_desc_col: str = "lu_desc",
    timestamp_col: str = "IPC_ZST",
) -> DataFrame:
    """
    Extract the latest lookup values for a given lu_name from a lookup table.

    This function performs the following operations:
    1. Filters the lookup table for a specific lu_name
    2. Finds the latest entries based on the timestamp column
    3. Returns unique lu_id and lu_desc pairs for efficient mapping

    Args:
        lookup_table (DataFrame): The input lookup table
        lu_name (str): Name of the lookup to filter
        lu_name_col (str, optional): Name of the column containing lookup names. Defaults to "lu_name".
        lu_id_col (str, optional): Name of the lookup ID column. Defaults to "lu_id".
        lu_desc_col (str, optional): Name of the lookup description column. Defaults to "lu_desc".
        timestamp_col (str, optional): Name of the timestamp column. Defaults to "IPC_ZST".

    Returns:
        DataFrame: A DataFrame containing latest lu_id and lu_desc pairs

    Raises:
        ValueError: If any of the input arguments are invalid

    Example:
        >>> lookup_table = spark.table("prod_app_degi_zdw_admin.v_udw001001t")
        >>> result = extract_latest_lookup_values(lookup_table, "PC99")
        >>> result.show(5)
        +-------+--------------------+
        |  lu_id|             lu_desc|
        +-------+--------------------+
        |GW33442|    Sonstiger Vertrag|
        |L32737 |          Erinnerung |
        |M135232|            Incident |
        |P032354|offener Beitrag H... |
        |P035236|          Datenschutz|
        +-------+--------------------+
    """
    # Input validation
    if not isinstance(lookup_table, DataFrame):
        raise ValueError("lookup_table must be a DataFrame")
    if not isinstance(lu_name, str):
        raise ValueError("lu_name must be a string")
    if not all(
        isinstance(col, str)
        for col in [lu_name_col, lu_id_col, lu_desc_col, timestamp_col]
    ):
        raise ValueError("All column name arguments must be strings")

    required_columns = {lu_name_col, lu_id_col, lu_desc_col, timestamp_col}
    if not required_columns.issubset(lookup_table.columns):
        raise ValueError(f"lookup_table must contain columns: {required_columns}")

    # Create a window partitioned by lu_id and ordered by timestamp descending
    window = Window.partitionBy(lu_id_col).orderBy(col(timestamp_col).desc())

    # Extract latest lookup values
    latest_lookup_values = (
        lookup_table.filter(trim(col(lu_name_col)) == lu_name)
        .withColumn("row_number", F.row_number().over(window))
        .filter(col("row_number") == 1)
        .select(lu_id_col, lu_desc_col)
        .distinct()
    )

    return broadcast(latest_lookup_values)


def map_lookup_values(
    target_df: DataFrame,
    lookup_df: DataFrame,
    target_lu_id_col: str,
    lookup_lu_id_col: str = "lu_id",
    lookup_lu_desc_col: str = "lu_desc",
) -> DataFrame:
    """
    Map lookup values to a target DataFrame based on lu_id.

    Args:
        target_df (DataFrame): The target DataFrame to map lookup values to
        lookup_df (DataFrame): The lookup DataFrame containing lu_id and lu_desc
        target_lu_id_col (str): Name of the lu_id column in the target DataFrame
        lookup_lu_id_col (str, optional): Name of the lu_id column in the lookup DataFrame. Defaults to "lu_id".
        lookup_lu_desc_col (str, optional): Name of the lu_desc column in the lookup DataFrame. Defaults to "lu_desc".

    Returns:
        DataFrame: The target DataFrame with mapped lookup descriptions

    Example:
        >>> lookup_table = spark.table("prod_app_degi_zdw_admin.v_udw001001t")
        >>> lookup_values = extract_latest_lookup_values(lookup_table, "PC99")
        >>> target_df = spark.table("my_target_table")
        >>> result = map_lookup_values(target_df, lookup_values, "my_lu_id_column")
        >>> result.show(5)
    """
    return target_df.join(
        lookup_df, target_df[target_lu_id_col] == lookup_df[lookup_lu_id_col], "left"
    ).drop(lookup_df[lookup_lu_id_col])


# Example usage
if __name__ == "__main__":
    lookup_table = spark.table("prod_app_degi_zdw_admin.v_udw001001t")
    lu_name = "PC99"

    # Extract latest lookup values
    lookup_values = extract_latest_lookup_values(lookup_table, lu_name)
    lookup_values.show(5)

    # Example of mapping these values to another DataFrame
    target_df = spark.table("my_target_table")
    result = map_lookup_values(target_df, lookup_values, "my_lu_id_column")
    result.show(5)