In [0]:
"""
write_utils.py

Contains helper functions for writing Spark DataFrames to Delta Lake with standardized configurations.
Promotes consistent use of overwrite behavior, schema merging, and partitioning across the pipeline.
"""

def write_to_delta(df, path, mode="overwrite", partition_by=None, merge_schema=True):
    """
    Writes a Spark DataFrame to Delta Lake format.

    Parameters:
    ----------
    df : pyspark.sql.DataFrame
        The DataFrame to write.
    path : str
        The target Delta Lake path.
    mode : str, optional
        Save mode, default is 'overwrite'.
    partition_by : str or list[str], optional
        Column(s) to partition by.
    merge_schema : bool, optional
        Whether to merge schema during write.

    Returns:
    -------
    None
    """


In [0]:
# /utils/write_utils

from pyspark.sql import SparkSession

def write_df_to_delta(df, path, mode="overwrite", merge_schema=True, register_table=True):
    """
    Generic Delta writer for any DataFrame.

    Parameters:
    - df: Spark DataFrame to write
    - path: Destination path (e.g., /mnt/delta/bronze/...) 
    - mode: Write mode (default is "overwrite")
    - merge_schema: Whether to enable mergeSchema (default True)
    - register_table: Whether to register a Hive table at same location
    """
    writer = df.write.format("delta").mode(mode)
    if merge_schema:
        writer = writer.option("mergeSchema", "true")
    writer.save(path)

    if register_table:
        spark = SparkSession.builder.getOrCreate()
        table_name = path.rstrip("/").split("/")[-1]  # last folder = table name
        spark.sql(f"DROP TABLE IF EXISTS {table_name}")
        spark.sql(f"""
            CREATE TABLE {table_name}
            USING DELTA
            LOCATION '{path}'
        """)