In [0]:
dbutils.widgets.removeAll()

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql import functions as F

In [0]:
dbutils.widgets.text("catalogo", "catalog_jptq")
dbutils.widgets.text("esquema_stage", "silver_jptq")
dbutils.widgets.text("esquema_target", "golden_jptq")
dbutils.widgets.text("tabla", "TBL_DIM_DIRECTOR_PELICULA_T2")
dbutils.widgets.text("storageLocation", "abfss://unity-catalog-jptq@adlsjptq0126.dfs.core.windows.net")

In [0]:
catalogo = dbutils.widgets.get("catalogo")
esquema_stage = dbutils.widgets.get("esquema_stage")
esquema_target = dbutils.widgets.get("esquema_target")
tabla = dbutils.widgets.get("tabla")
storageLocation = dbutils.widgets.get("storageLocation")

In [0]:
def fn_create_table_dim_director_pelicula(catalogo: str, esquema: str, tabla: str):
   if spark.catalog.tableExists(f"{catalogo}.{esquema_target}.{tabla}"):
      print(f"La tabla ya existe: {catalogo}.{esquema_target}.{tabla}")
   else:
      spark.sql(f"""CREATE TABLE IF NOT EXISTS {catalogo}.{esquema_target}.{tabla} (
      SK_DIM_DIRECTOR BIGINT GENERATED ALWAYS AS IDENTITY,
      COD_DIRECTOR_PELICULA STRING, 
      DES_DIRECTOR_PELICULA STRING,
      BAN_ACTIVO INT,
      FEC_INICIO_VIGENCIA DATE,
      FEC_FIN_VIGENCIA DATE,
      FEC_CARGA DATE,
      FEC_ACTUALIZACION DATE
      )
      USING DELTA
      LOCATION '{storageLocation}/golden-jptq/{tabla}'""")
      print(f"Tabla creada correctamente: {catalogo}.{esquema_target}.{tabla}")

In [0]:
fn_create_table_dim_director_pelicula(catalogo,esquema_target,tabla)

In [0]:
def fn_update_table_dim_director_pelicula(catalogo: str, esquema: str, tabla: str):
    spark.sql(f"""MERGE INTO {catalogo}.{esquema_target}.{tabla} AS TARGET
    USING {catalogo}.{esquema_stage}.TBL_TMP_DIRECTOR_PELICULA AS SOURCE
    ON TARGET.COD_DIRECTOR_PELICULA = SOURCE.COD_DIRECTOR
    AND TARGET.BAN_ACTIVO = 1
    WHEN MATCHED 
    AND (
         TARGET.DES_DIRECTOR_PELICULA <> SOURCE.DES_DIRECTOR 
    )
    THEN UPDATE SET
    TARGET.FEC_FIN_VIGENCIA = CURRENT_DATE(),
    TARGET.FEC_ACTUALIZACION = CURRENT_DATE(),
    TARGET.BAN_ACTIVO = 0
    """)
    print(f"Actualizacion Correcta en {catalogo}.{esquema_target}.{tabla}")

In [0]:
fn_update_table_dim_director_pelicula(catalogo,esquema_target,tabla)

In [0]:
def fn_insert_table_dim_director_pelicula(catalogo: str, esquema: str, tabla: str):
    spark.sql(f"""INSERT INTO {catalogo}.{esquema_target}.{tabla} (
    COD_DIRECTOR_PELICULA,
    DES_DIRECTOR_PELICULA,
    BAN_ACTIVO,
    FEC_INICIO_VIGENCIA,
    FEC_FIN_VIGENCIA,
    FEC_CARGA,
    FEC_ACTUALIZACION
    )
    SELECT
    SOURCE.COD_DIRECTOR,
    SOURCE.DES_DIRECTOR,
    1,
    CURRENT_DATE(),
    TO_DATE('31/12/9999','dd/MM/yyyy'),
    CURRENT_DATE(),
    TO_DATE('31/12/9999','dd/MM/yyyy')
    FROM {catalogo}.{esquema_stage}.TBL_TMP_DIRECTOR_PELICULA SOURCE
    LEFT JOIN {catalogo}.{esquema_target}.{tabla} TARGET
    ON SOURCE.COD_DIRECTOR = TARGET.COD_DIRECTOR_PELICULA
    AND TARGET.BAN_ACTIVO = 1
    WHERE TARGET.COD_DIRECTOR_PELICULA IS NULL
    OR (
        TARGET.DES_DIRECTOR_PELICULA <> SOURCE.DES_DIRECTOR
    )
    """)
    print(f"Insercion Correcta en {catalogo}.{esquema_target}.{tabla}")

In [0]:
fn_insert_table_dim_director_pelicula(catalogo,esquema_target,tabla)

In [0]:
%sql
SELECT
*
FROM
catalog_jptq.golden_jptq.tbl_dim_director_pelicula_t2