In [0]:
import datetime
from dateutil.relativedelta import relativedelta
import pyspark.sql.functions as F
import pyspark.sql.types as T
import os
from pyspark.sql.functions import lit, when,to_date

environment = os.getenv("ENVIRONMENT")
dbutils.widgets.removeAll()

dbutils.widgets.text("brewdat_library_version", "v1.1.5", "01 - brewdat_library_version")
brewdat_library_version = dbutils.widgets.get("brewdat_library_version")
print(f"{brewdat_library_version = }")

dbutils.widgets.text("target_database", "gld_maz_logistics_warehouse", "02 - target_database")
target_database = dbutils.widgets.get("target_database")
print(f"{target_database = }")

dbutils.widgets.text("target_table", f"maz_stock_h_plus", "03 - target_table")
target_table = dbutils.widgets.get("target_table")
print(f"{target_table = }")

dbutils.widgets.text("target_zone", "maz", "04 - target_zone")
target_zone = dbutils.widgets.get("target_zone")
print(f"{target_zone = }")

dbutils.widgets.text("target_business_domain", "logistics", "05 - target_business_domain")
target_business_domain = dbutils.widgets.get("target_business_domain")
print(f"{target_business_domain = }")

dbutils.widgets.text("target_business_subdomain", "warehouse", "06 - target_business_subdomain")
target_business_subdomain = dbutils.widgets.get("target_business_subdomain")
print(f"{target_business_subdomain = }")

dbutils.widgets.text("target_subzone", "maz", "07 - target_subzone")
target_subzone = dbutils.widgets.get("target_subzone")
print(f"{target_subzone = }")

dbutils.widgets.text("target_product", "shipment_content", "08 - target_product")
target_product = dbutils.widgets.get("target_product")
print(f"{target_product = }")

dbutils.widgets.text("date_analysis", "","09 - date_analysis")
date_analysis = dbutils.widgets.get("date_analysis")
print(f"{date_analysis = }")

print(f"{environment = }")

In [0]:
import sys
# Import BrewDat Library modules and share dbutils globally
sys.path.append(f"/Workspace/Repos/brewdat_library/{brewdat_library_version }")
from brewdat.data_engineering import common_utils, lakehouse_utils, transform_utils, write_utils
common_utils.set_global_dbutils(dbutils)

In [0]:
%run "../set_project_context"

In [0]:
# Configure SPN for all ADLS access using AKV-backed secret scope
common_utils.configure_spn_access_for_adls(
    storage_account_names=[adls_silver_gold_storage_account_name],
    key_vault_name=key_vault_name,
    spn_client_id=spn_client_id,
    spn_secret_name=spn_secret_name
)

In [0]:
spark.conf.set("spark.databricks.adaptive.autoOptimizeShuffle.minPartitionNumber", 10000)
spark.conf.set("spark.databricks.adaptive.autoOptimizeShuffle.enabled", True )
spark.conf.set("spark.databricks.adaptive.skewJoin.spillProof.enabled", True)

In [0]:
try:    
    stock_vw_d = spark.read.table(f"gld_maz_logistics_warehouse.maz_stock_vw_plus").alias("stock_vw_d")
except Exception:
    common_utils.exit_with_last_exception()

In [0]:
#Adding Column Date to Stock_vw
stock_vw_d=stock_vw_d.withColumn("insertion_date", to_date(lit(f"{date_analysis}"), "yyyy-MM-dd"))

#Set the key column table
ids = ["insertion_date", "country_code", "plant_code", "storage_location_code", "product_code", "batch_code"]

#Set the key time column
watermark = ["insertion_date"]

In [0]:
stock_vw_d = stock_vw_d.withColumn('__partition_column', F.date_format('insertion_date','yyyyMM').cast(T.IntegerType()))

In [0]:
stock_vw_d = transform_utils.create_or_replace_audit_columns(stock_vw_d)
stock_vw_d = transform_utils.deduplicate_records(df=stock_vw_d, key_columns=ids, watermark_column=watermark)

In [0]:
# Sets location for gold folder --revisar
params_list = [
    lakehouse_gold_root,
    target_zone,
    target_business_domain,
    target_subzone,
    target_product,
    target_table,
]

if any(x is None or len(x) == 0 for x in params_list):
    raise ValueError("Location would contain null or empty values.")

lakehouse_utils.assert_valid_zone(target_zone)
lakehouse_utils.assert_valid_business_domain(target_business_domain)
lakehouse_utils.assert_valid_folder_name(target_table)
target_location = (
    f"{lakehouse_gold_root}/data/{target_zone}/{target_business_domain}/"
    + f"gld_{target_zone}_{target_business_domain}_{target_business_subdomain}/{target_table}"
).lower()

print(f"{target_location = }")
print(f"{target_table = }")
print(f"{target_database = }")

In [0]:
def table_exists(database, table):
    try:
        spark.sql(f"DESCRIBE TABLE {database}.{table}")
        return True
    except:
        return False

In [0]:
# if spark.catalog.tableExists(f"{target_database}.{target_table}"):
#     sqlquery = f"DELETE FROM {target_database}.{target_table} WHERE ABS(DATEDIFF(year, GETDATE(), insertion_date)) >= 2"        
#     spark.sql(sqlquery).show()

if table_exists(target_database,target_table):
    sqlquery = f"DELETE FROM {target_database}.{target_table} WHERE ABS(DATEDIFF(year, GETDATE(), insertion_date)) >= 2"        
    spark.sql(sqlquery).show()

In [0]:
results = write_utils.write_delta_table(
    df=stock_vw_d,
    location=target_location,
    database_name=target_database,
    table_name=target_table,
    load_type=write_utils.LoadType.UPSERT,
    # load_type=write_utils.LoadType.OVERWRITE_TABLE,
    key_columns=ids,
    partition_columns=["__partition_column"],
    schema_evolution_mode=write_utils.SchemaEvolutionMode.ADD_NEW_COLUMNS,
    # schema_evolution_mode=write_utils.SchemaEvolutionMode.OVERWRITE_SCHEMA,
    bad_record_handling_mode=write_utils.BadRecordHandlingMode.REJECT,
    enable_vacuum=False,
    enable_caching=False,
)
print(results)

In [0]:
common_utils.exit_with_object(results)