In [0]:
import json
from pyspark.sql import Row
from pyspark.sql.functions import current_date, to_date, col, when, lit
from datetime import datetime, timedelta
import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql.window import Window
import sys
import os
from pyspark.sql.utils import AnalysisException

%md
### DEFINICION DE PIPELINE PARA POSTERIOR CALCULO DE maz_materials_plus; maz_stock_inventory_plus;maz_stock_vw_plus; maz_stock_h_plus

In [0]:
notebook_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()
folder_path_ = os.path.dirname(notebook_path)
folder_path = f"/Workspace{folder_path_}"
# folder_path = f"{folder_path_}"

print(folder_path)

In [0]:
os.path.abspath("../set_project_context")

In [0]:
def exec_chain_stock_h_plus(mchb,mard,mbew,marc,date):
    dbutils.notebook.run(
        f"{folder_path}/test", 
        timeout_seconds=0
        # arguments={
        #     "mchb": mchb,
        #     "mard": mard,
        #     "mbew": mbew,
        #     "marc": marc,
        # }
    )
    
    maz_materials_plus = dbutils.notebook.run(
        f"{folder_path}/maz_materials_plus",
        timeout_seconds=0,
        arguments={
            "mchb": mchb,
            "mard": mard
        }
    )

    maz_stock_inventory_plus = dbutils.notebook.run(
        f"{folder_path}/maz_stock_inventory_plus", 
        timeout_seconds=0,
        arguments={
            "mchb": mchb,
            "mard": mard,
            "mbew": mbew,
            "marc": marc,
        }
    )

    maz_stock_vw_plus = dbutils.notebook.run(
        f"{folder_path}/maz_stock_vw_plus", 
        timeout_seconds=0
        # arguments={
        #     "pais": "Colombia",
        #     "anio": "2024"
        # }
    )

    maz_stock_h_plus = dbutils.notebook.run(
        f"{folder_path}/maz_stock_h_plus", 
        timeout_seconds=0,
        arguments={
            "date_analysis": date 
            #.strftime("%Y-%m-%d")
        }
    )


    try:
        spark.table("gld_maz_logistics_warehouse.maz_stock_vw_plus")
        maz_stock_vw_plus="SUCCEEDED"
    except:
        maz_stock_vw_plus="FAILED"

    maz_materials_plus = json.loads(maz_materials_plus)
    maz_stock_inventory_plus = json.loads(maz_stock_inventory_plus)
    # maz_stock_vw_plus = json.loads(maz_stock_vw_plus)
    maz_stock_h_plus = json.loads(maz_stock_h_plus)

    row = Row(
        fecha_cargada = date,
        maz_materials_plus = maz_materials_plus["status"],
        maz_stock_inventory_plus = maz_stock_inventory_plus["status"],
        maz_stock_vw_plus = maz_stock_vw_plus,
        maz_stock_h_plus = maz_stock_h_plus["status"]
    )

    result_exec = spark.createDataFrame([row]).withColumn("exec_status", when((col("maz_materials_plus") == "SUCCEEDED") & (col("maz_stock_inventory_plus") == "SUCCEEDED") & (col("maz_stock_vw_plus") == "SUCCEEDED") & (col("maz_stock_h_plus") == "SUCCEEDED"), "SUCCEEDED").otherwise("FAILED"))
    return result_exec


### **PIPELINE PARA CALCULO DE maz_copec_marc_lvc; maz_copec_mbew_lvc; maz_copec_mard_lvc; maz_copec_mchb_lvc**

In [0]:
# # ======================================
# # CORRER SOLO UNA VEZ, DESPUES COMENTAR
# # ======================================

# dbutils.notebook.run(
#         f"{folder_path}/maz_copec_marc_lvc", 
#         timeout_seconds=0
#         # arguments={
#         #     "pais": "Colombia",
#         #     "anio": "2024"
#         # }
#     )

# dbutils.notebook.run(
#         f"{folder_path}/maz_copec_mbew_lvc", 
#         timeout_seconds=0
#         # arguments={
#         #     "pais": "Colombia",
#         #     "anio": "2024"
#         # }
#     )

# dbutils.notebook.run(
#         f"{folder_path}/maz_copec_mard_lvc", 
#         timeout_seconds=0
#         # arguments={
#         #     "pais": "Colombia",
#         #     "anio": "2024"
#         # }
#     )

# dbutils.notebook.run(
#         f"{folder_path}/maz_copec_mchb_lvc", 
#         timeout_seconds=0
#         # arguments={
#         #     "pais": "Colombia",
#         #     "anio": "2024"
#         # }
#     )

In [0]:
target_subzone = "copecac"

In [0]:
import os
environment = os.getenv("ENVIRONMENT")
if environment not in ["dev", "qa", "prod"]:
    raise Exception(
        "This Databricks Workspace does not have necessary environment variables."
        " Contact the admin team to set up the global init script and restart your cluster."
    )

if environment == 'dev':
    uc_name = 'brewdat_uc_maz_dev'
elif environment == 'qa':
    uc_name = 'brewdat_uc_maz_qa'
elif environment == 'prod':
    uc_name = 'brewdat_uc_maz_prod'

if target_subzone == 'copecac':
    mtda_source_schema = 'slv_maz_masterdata_sap_pr3'
    supply_source_schema = 'slv_maz_supply_sap_pr3'
elif target_subzone == 'mx':
    mtda_source_schema = 'slv_maz_masterdata_sap_pr0'
    supply_source_schema = 'slv_maz_supply_sap_pr0'

print(f"{mtda_source_schema=}\n{supply_source_schema=}\n{uc_name=}")

### **CALCULO MCHB, MARD, MBEW, MARC PARA EL CURRENT DAY**

In [0]:
copecac_mchb_current_date = spark.read.table(f"brewdat_uc_maz_prod.{supply_source_schema}.{target_subzone}_mchb").select("matnr", "werks", "lgort", "charg", "lvorm", "clabs", "cspem", "cinsm", "cretm", "ceinm", "cumlm", "cvmum", "cvmin", "cvmei", "cvmsp", "cvmre", "cvmla", "ersda", "laeda", "op_ind").filter(F.col("op_ind") != "D")

copecac_mard_current_date = (
        spark.read.table(f"brewdat_uc_maz_prod.{mtda_source_schema}.{target_subzone}_mard")
        .select("matnr","werks","lgort","lvorm","labst","speme","insme","retme","einme","op_ind")
        .filter(~F.col("op_ind").contains("D"))
    ) 

copecac_mbew_current_date = (
        spark.read.table(f"brewdat_uc_maz_prod.{mtda_source_schema}.{target_subzone}_mbew")
        .select("matnr", "bwkey", "salk3", "lbkum", "vprsv", "verpr", "stprs", "vmpei", "peinh", "bwtar", "op_ind")
        .filter(~F.col("op_ind").contains("D"))
        .alias("copecac_mbew")
    )

copecac_marc_current_date = (
    spark.read.table(f"brewdat_uc_maz_prod.{mtda_source_schema}.{target_subzone}_marc")
    .select("matnr", "werks", "trame", "bwesb", "umlmc", "glgmg", "lvorm", "op_ind")
    .filter(~F.col("op_ind").contains("D"))
    .alias("copecac_marc")
)
# test=copecac_mchb_current_date
# test.createOrReplaceGlobalTempView("tabla_test")
copecac_mchb_temp="copecac_mchb_temp"
copecac_mard_temp="copecac_mard_temp"
copecac_mbew_temp="copecac_mbew_temp"
copecac_marc_temp="copecac_marc_temp"
copecac_mchb_current_date.createOrReplaceGlobalTempView(copecac_mchb_temp)
copecac_mard_current_date.createOrReplaceGlobalTempView(copecac_mard_temp)
copecac_mbew_current_date.createOrReplaceGlobalTempView(copecac_mbew_temp)
copecac_marc_current_date.createOrReplaceGlobalTempView(copecac_marc_temp)

### **CALCULO maz_stock_h_plus PARA LAS FECHAS QUE SE DEFINAN**

In [0]:
# =========================================================
# 1. GENERAR LISTA "dates" DESDE 2025-10-01 HASTA HOY
# =========================================================

start_date = datetime.strptime("2025-10-01", "%Y-%m-%d")
end_date = datetime.now()

dates = []
current = start_date
while current <= end_date:
    dates.append(current.strftime("%Y-%m-%d"))
    current += timedelta(days=1)

print("Total dates generated:", len(dates))


# =========================================================
# 2. CARGAR LA TABLA Y OBTENER insertion_date
# =========================================================

table_name = f"brewdat_uc_maz_dev.gld_maz_logistics_warehouse.maz_stock_h_plus"

try:
    # Intentar leer la tabla
    df_stock = spark.table(table_name)

    # Convertir columna
    df_stock = df_stock.withColumn("insertion_date", to_date(col("insertion_date")))

    # Fechas existentes en la tabla
    dates_in_table = [
        row["insertion_date"].strftime("%Y-%m-%d")
        for row in df_stock.select("insertion_date").distinct().collect()
    ]

    # Fechas faltantes
    missing_dates = [d for d in dates if d not in dates_in_table]

    print("Tabla encontrada.")
    print(f"Total fechas faltantes: {len(missing_dates)}")

except AnalysisException:
    # La tabla NO existe
    print(f"⚠️ La tabla {table_name} NO existe.")
    print("→ Se usarán todas las fechas desde 2025-10-01 hasta hoy.")

    missing_dates = dates
    
# missing_dates = ['2025-11-24','2025-11-25']


print("Missing dates:", missing_dates)


# =========================================================
# 3. ITERAR SOBRE MISSING_DATES
# =========================================================
# Reemplaza estas variables por tus DataFrames reales:
# df_mchb, df_mard → para fechas antiguas
# copecac_mchb_current_date, copecac_mard_current_date → para la fecha actual

today_str = datetime.now().strftime("%Y-%m-%d")

row = Row(
        fecha_cargada = "",
        maz_materials_plus = "",
        maz_stock_inventory_plus = "",
        maz_stock_vw_plus = "",
        maz_stock_h_plus = "",
        exec_status = "",
        df_mard_count="",
        df_mchb_count="",
        df_mbew_count="",
        df_marc_count=""       
    )

result_exec_final = spark.createDataFrame([row])

for date in missing_dates:

    # MARD
    w_mard = Window.partitionBy(
        "werks",
        "matnr",
        "lgort"
    ).orderBy(
        F.col("source_commit_ts").desc(),
        F.col("__insert_gmt_ts").desc()
    )

    df_mard = (
        spark.table("gld_maz_logistics_warehouse.maz_copec_mard_lvc")\
        .filter(F.col("source_commit_dt") < date)\
        .withColumn("unique_version", F.row_number().over(w_mard))\
        .filter(F.col("unique_version")==1)\
        .drop("unique_version", "last_version")
    )
    df_mard.cache()
    count_mard =df_mard.count()
    mard_temp="mard_temp"
    df_mard.createOrReplaceGlobalTempView(mard_temp)

    # MCHB
    w_mchb = Window.partitionBy(
        "werks",
        "matnr",
        "lgort",
        "charg"
    ).orderBy(
        F.col("source_commit_ts").desc(),
        F.col("__insert_gmt_ts").desc()
    )

    df_mchb = (
        spark.table("gld_maz_logistics_warehouse.maz_copec_mchb_lvc")\
        .filter(F.col("source_commit_dt") < date)\
        .withColumn("unique_version", F.row_number().over(w_mchb))\
        .filter(F.col("unique_version")==1)\
        .drop("unique_version", "last_version")
    )
    df_mchb.cache()
    count_mchb =df_mchb.count()
    mchb_temp="mchb_temp"
    df_mchb.createOrReplaceGlobalTempView(mchb_temp)

    # MBEW
    w_mbew = Window.partitionBy(
        "matnr", 
        "bwkey", 
        "bwtar"
    ).orderBy(
        F.col("source_commit_ts").desc(),
        F.col("__insert_gmt_ts").desc()
    )

    df_mbew = (
        spark.table("gld_maz_logistics_warehouse.maz_copec_mbew_lvc")\
        .filter(F.col("source_commit_dt") < date)\
        .withColumn("unique_version", F.row_number().over(w_mbew))\
        .filter(F.col("unique_version")==1)\
        .drop("unique_version", "last_version")
    )
    df_mbew.cache()
    count_mbew =df_mbew.count()
    mbew_temp="mbew_temp"
    df_mbew.createOrReplaceGlobalTempView(mbew_temp)

    # MARC
    w_marc = Window.partitionBy(
        "matnr", 
        "werks"
    ).orderBy(
        F.col("source_commit_ts").desc(),
        F.col("__insert_gmt_ts").desc()
    )

    df_marc = (
        spark.table("gld_maz_logistics_warehouse.maz_copec_marc_lvc")\
        .filter(F.col("source_commit_dt") < date)\
        .withColumn("unique_version", F.row_number().over(w_marc))\
        .filter(F.col("unique_version")==1)\
        .drop("unique_version", "last_version")
    )
    df_marc.cache()
    count_marc =df_marc.count()
    marc_temp="marc_temp"
    df_marc.createOrReplaceGlobalTempView(marc_temp)


    if date < today_str:
        mchb = f"global_temp.{mchb_temp}"
        mard = f"global_temp.{mard_temp}"
        mbew = f"global_temp.{mbew_temp}"
        marc = f"global_temp.{marc_temp}"
    else:
        mchb = f"global_temp.{copecac_mchb_temp}"
        mard = f"global_temp.{copecac_mard_temp}"
        mbew = f"global_temp.{copecac_mbew_temp}"
        marc = f"global_temp.{copecac_marc_temp}"

    print(f"Ejecutando para fecha {date}...")


    # Si es una función Python directamente:
    final_stock_h_plus=exec_chain_stock_h_plus(mchb, mard, mbew, marc, date)\
                            .withColumn("df_mard_count", lit(count_mard))\
                            .withColumn("df_mchb_count", lit(count_mchb))\
                            .withColumn("df_mbew_count", lit(count_mbew))\
                            .withColumn("df_marc_count", lit(count_marc))
    result_exec_final = result_exec_final.unionByName(final_stock_h_plus)
    result_exec_final.createOrReplaceGlobalTempView("result_exec_final_temp")



In [0]:
# result_exec_final.show()

### **BORRADO DE TABLAS maz_materials_plus;maz_stock_inventory_plus;maz_stock_h_plus**

In [0]:

# # ===============================================================================
# # maz_materials_plus
# # ===============================================================================

# tables = spark.sql("SHOW TABLES IN brewdat_uc_maz_dev.gld_maz_logistics_warehouse")
# if tables.filter(tables['tableName'] == 'maz_materials_plus').count() > 0:
#     sqlquery = "DROP TABLE gld_maz_logistics_warehouse.maz_materials_plus"
#     result = spark.sql(sqlquery)
#     display(result)

# target_location1='abfss://gold@brewdatmazslvgldd.dfs.core.windows.net/data/maz/logistics/gld_maz_logistics_warehouse/maz_materials_plus'
# # target_location1='abfss://gold@brewdatmazslvgldd.dfs.core.windows.net/data/maz/masterdata/gld_maz_masterdata_materials/maz_materials_plus'
# dbutils.fs.rm(target_location1,recurse=True)


# # # # # ===============================================================================
# # # # # maz_stock_inventory_plus
# # # # # ===============================================================================

# tables = spark.sql("SHOW TABLES IN brewdat_uc_maz_dev.gld_maz_logistics_warehouse")
# if tables.filter(tables['tableName'] == 'maz_stock_inventory_plus').count() > 0:
#     sqlquery = "DROP TABLE gld_maz_logistics_warehouse.maz_stock_inventory_plus"
#     result = spark.sql(sqlquery)
#     display(result)

# target_location2 = 'abfss://gold@brewdatmazslvgldd.dfs.core.windows.net/data/maz/logistics/gld_maz_logistics_warehouse/maz_stock_inventory_plus'
# dbutils.fs.rm(target_location2,recurse=True)


# # # # # ===============================================================================
# # # # # maz_stock_h_plus
# # # # # ===============================================================================

# tables = spark.sql("SHOW TABLES IN brewdat_uc_maz_dev.gld_maz_logistics_warehouse")
# if tables.filter(tables['tableName'] == 'maz_stock_h_plus').count() > 0:
#     sqlquery = "DROP TABLE gld_maz_logistics_warehouse.maz_stock_h_plus"
#     result = spark.sql(sqlquery)
#     display(result)

# target_location3 = 'abfss://gold@brewdatmazslvgldd.dfs.core.windows.net/data/maz/logistics/gld_maz_logistics_warehouse/maz_stock_h_plus'
# dbutils.fs.rm(target_location3,recurse=True)

### **BORRADO DE TABLAS maz_copec_mard_lvc;maz_copec_mchb_lvc;maz_copec_marc_lvc;maz_copec_mbew_lvc**

In [0]:
# # # # ===============================================================================
# # # # maz_copec_mard_lvc
# # # # ===============================================================================


# tables = spark.sql("SHOW TABLES IN brewdat_uc_maz_dev.gld_maz_logistics_warehouse")
# if tables.filter(tables['tableName'] == 'maz_copec_mard_lvc').count() > 0:
#     sqlquery = "DROP TABLE gld_maz_logistics_warehouse.maz_copec_mard_lvc"
#     result = spark.sql(sqlquery)
#     display(result)

# target_location = 'abfss://gold@brewdatmazslvgldd.dfs.core.windows.net/data/maz/logistics/gld_maz_logistics_warehouse/maz_copec_mard_lvc'
# dbutils.fs.rm(target_location,recurse=True)

# # # # ===============================================================================
# # # # maz_copec_mchb_lvc
# # # # ===============================================================================


# tables = spark.sql("SHOW TABLES IN brewdat_uc_maz_dev.gld_maz_logistics_warehouse")
# if tables.filter(tables['tableName'] == 'maz_copec_mchb_lvc').count() > 0:
#     sqlquery = "DROP TABLE gld_maz_logistics_warehouse.maz_copec_mchb_lvc"
#     result = spark.sql(sqlquery)
#     display(result)

# target_location = 'abfss://gold@brewdatmazslvgldd.dfs.core.windows.net/data/maz/logistics/gld_maz_logistics_warehouse/maz_copec_mchb_lvc'
# dbutils.fs.rm(target_location,recurse=True)

# # # # ===============================================================================
# # # # maz_copec_marc_lvc
# # # # ===============================================================================

# tables = spark.sql("SHOW TABLES IN brewdat_uc_maz_dev.gld_maz_logistics_warehouse")
# if tables.filter(tables['tableName'] == 'maz_copec_marc_lvc').count() > 0:
#     sqlquery = "DROP TABLE gld_maz_logistics_warehouse.maz_copec_marc_lvc"
#     result = spark.sql(sqlquery)
#     display(result)

# target_location = 'abfss://gold@brewdatmazslvgldd.dfs.core.windows.net/data/maz/logistics/gld_maz_logistics_warehouse/maz_copec_marc_lvc'
# dbutils.fs.rm(target_location,recurse=True)

# # # # ===============================================================================
# # # # maz_copec_mbew_lvc
# # # # ===============================================================================

# tables = spark.sql("SHOW TABLES IN brewdat_uc_maz_dev.gld_maz_logistics_warehouse")
# if tables.filter(tables['tableName'] == 'maz_copec_mbew_lvc').count() > 0:
#     sqlquery = "DROP TABLE gld_maz_logistics_warehouse.maz_copec_mbew_lvc"
#     result = spark.sql(sqlquery)
#     display(result)

# target_location = 'abfss://gold@brewdatmazslvgldd.dfs.core.windows.net/data/maz/logistics/gld_maz_logistics_warehouse/maz_copec_mbew_lvc'
# dbutils.fs.rm(target_location,recurse=True)