In [0]:
from pyspark.sql.functions import current_timestamp

In [0]:
def add_ingestion_date(input_df):
    output_df = input_df.withColumn("ingestion_date", current_timestamp())
    return output_df

In [0]:
def re_arrange_partition_columns(input_df, partition_column):
    column_lists = []
    for column_name in input_df.schema.names:
        if column_name != partition_column:
            column_lists.append(column_name)
    column_lists.append(partition_column)
    output_df = input_df.select(column_lists)
    return output_df

In [0]:
from delta.tables import DeltaTable

# Set the catalog and schema
spark.sql("USE CATALOG motor_dev")
spark.sql("USE SCHEMA silver")

def merge_delta_data(input_df, db_name, table_name, merge_condition,partition_column):
    spark.conf.set("spark.databricks.optimizer.dynamicPartitionPruning", "true")
    
    full_table_name = f"{db_name}.{table_name}"
    
    if spark.catalog.tableExists(full_table_name):
        deltaTable = DeltaTable.forName(spark, full_table_name)
        deltaTable.alias("tgt").merge(
            input_df.alias("src"),
            merge_condition
        ).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute()
    else:
        input_df.write.format("delta").mode("overwrite").partitionBy(partition_column).saveAsTable(full_table_name)



In [0]:
def df_column_to_list(input_df, column_name):
    df_row_list = input_df.select(column_name)\
        .distinct()\
        .collect()
    column_value_list = [row[column_name] for row in df_row_list]
    return column_value_list