In [None]:
import sys
import os

sys.path.append(os.path.abspath('..'))

from src.utilities import *

In [None]:
TABLE_SILVER_EXP = "silver_dev.sctr_emision.expuestos_silver"
TABLE_SILVER_CONT = "silver_dev.sctr_emision.contratantes_silver"

TABLE_GOLD_BI = "gold_dev.sctr_emision.consolidado_gold"

set_config_spark(NUM_PARTITIONS)
open_log("Gold")

logger.info(f"üü° Iniciando proceso Gold en Databricks. Cl√∫ster: {spark.conf.get('spark.databricks.clusterUsageTags.clusterId')}")

In [None]:
def process_gold_consolidation() -> DataFrame:
    try:
        logger.info(f"   üîç Leyendo Silver Expuestos...")
        df_exp = spark.read.table(TABLE_SILVER_EXP)

        total_rows = df_exp.count()
        if total_rows == 0: 
            logger.warning(f"   ‚ö†Ô∏è La tabla Silver Expuestos no contiene registros.")
            # raise Exception(f"La tabla Silver Expuestos no contiene registros.")
            return None

        logger.info(f"   üîç Leyendo Silver Contratantes...")
        df_cont = spark.read.table(TABLE_SILVER_CONT)

        total_rows = df_cont.count()
        if total_rows == 0: 
            logger.warning(f"   ‚ö†Ô∏è La tabla Silver Contratantes no contiene registros.")
            return None

        logger.info(f"   üîÑ Transformando Consolidado...")

        df_exp = df_exp.alias("A")
        df_cont = df_cont.alias("B")
        
        join_cond = (
            (F.col("A.POLIZA") == F.col("B.POLIZA")) 
            # &
            # (F.col("A.YEAR_MOV") == F.col("B.YEAR_MOV")) 
            # &
            # (F.col("A.MONTH_MOV") == F.col("B.MONTH_MOV"))
        )
        
        df_joined = df_exp.join(df_cont, join_cond, "left")
        
        cols_select = [
            F.col("A.POLIZA"),
            F.col("A.F_INI_VIGEN_POLIZA"),
            F.col("A.F_FIN_VIGEN_POLIZA"),
            F.col("A.F_INI_COBERT"),
            F.col("A.F_FIN_COBERT"),
            F.col("B.NUM_DOC_CONT"),
            F.col("B.CONTRATANTE"),
            F.col("A.TIPO_DOC"),
            F.col("A.NUM_DOC"),
            F.col("A.ULT_DIGI_DOC"),
            F.col("A.EXPUESTO")
        ]
        
        df_final = df_joined.select(*cols_select).distinct()

        return df_final
    except Exception as e:
        logger.error(f"   ‚ùå Error en Consolidado Gold. {e}")
        return None

In [None]:
if __name__ == "__main__":
    try:
        df_gold = process_gold_consolidation()

        if df_gold is None:
            raise Exception("El proceso fall√≥ al consolidar y transformar la informaci√≥n de la capa Silver.")
        
        total_rows = df_gold.count()
        logger.info(f"   üìä Total Registros Guardados: {total_rows:,.0f}")

        saved_successfully = save_to_table_delta(df_gold, TABLE_GOLD_BI, "overwrite", "false")

        if not saved_successfully:
            raise Exception(f"El proceso fall√≥ al guardar la informaci√≥n en la tabla delta {TABLE_GOLD_BI}")
        
        if validate_table_delta(TABLE_GOLD_BI):
            logger.info("   üßπ Optimizando tabla Gold Consolidado BI...")
            spark.sql(f"OPTIMIZE {TABLE_GOLD_BI} ZORDER BY (ULT_DIGI_DOC, NUM_DOC)")

        logger.success("üèÅ Ejecuci√≥n Completa: Proceso Gold Finalizado con √©xito.")
    except Exception as e:
        logger.error(f"‚ùå Error cr√≠tico en Proceso Gold. {e}")
        raise e
    finally:
        finalize_process()