In [0]:
dbutils.widgets.removeAll()

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql import functions as F

In [0]:
dbutils.widgets.text("catalogo", "catalog_jptq")
dbutils.widgets.text("esquema_source", "bronze_jptq")
dbutils.widgets.text("esquema_stage", "silver_jptq")
dbutils.widgets.text("storageLocation", "abfss://unity-catalog-jptq@adlsjptq0126.dfs.core.windows.net")

In [0]:
catalogo = dbutils.widgets.get("catalogo")
esquema_source = dbutils.widgets.get("esquema_source")
esquema_stage = dbutils.widgets.get("esquema_stage")
storageLocation = dbutils.widgets.get("storageLocation")

In [0]:
def fn_transform_pelicula(catalogo: str, esquema_source: str):
   sql_select_pelicula = f"""SELECT
                              P.ID_PELICULA,
                              P.DES_PELICULA,
                              MIP.DES_DURACION_PELICULA,
                              DP.DES_TOP_ACTORES,
                              PP.DES_RUTA_POSTER_FRONTAL,
                              PP.DES_RUTA_POSTER_TRASERA,
                              P.FEC_LANZAMIENTO
                              FROM {catalogo}.{esquema_source}.TBL_PELICULAS P
                              LEFT JOIN {catalogo}.{esquema_source}.TBL_DETALLE_PELICULAS DP
                              ON P.ID_PELICULA = DP.ID_PELICULA
                              LEFT JOIN {catalogo}.{esquema_source}.TBL_MAS_INFO_PELICULAS MIP
                              ON P.ID_PELICULA = MIP.PELICULA_ID
                              LEFT JOIN {catalogo}.{esquema_source}.TBL_POSTER_PELICULAS PP
                              ON P.ID_PELICULA = PP.ID_PELICULA
                              GROUP BY P.ID_PELICULA,P.DES_PELICULA,MIP.DES_DURACION_PELICULA,
                              DP.DES_TOP_ACTORES,PP.DES_RUTA_POSTER_FRONTAL,PP.DES_RUTA_POSTER_TRASERA,
                              P.FEC_LANZAMIENTO
                              ORDER BY P.ID_PELICULA ASC"""
   df_t_pelicula = spark.sql(sql_select_pelicula)
   return df_t_pelicula

In [0]:
df_tmp_pelicula = fn_transform_pelicula(catalogo, esquema_source)

In [0]:
df_tmp_pelicula = df_tmp_pelicula.withColumn("FEC_CARGA", current_timestamp())

In [0]:
display(df_tmp_pelicula)

In [0]:
#df_tmp_pelicula.write.mode("overwrite").saveAsTable(f"{catalogo}.{esquema_stage}.TBL_TMP_PELICULA")
df_tmp_pelicula.createOrReplaceTempView("tmp_df_tmp_pelicula")

In [0]:
def fn_create_table_tmp_pelicula(catalogo: str, esquema_stage: str):
   if spark.catalog.tableExists(f"{catalogo}.{esquema_stage}.TBL_TMP_PELICULA"):
      print(f"La tabla ya existe: {catalogo}.{esquema_stage}.TBL_TMP_PELICULA")
   else:
      spark.sql(f"""CREATE TABLE IF NOT EXISTS {catalogo}.{esquema_stage}.TBL_TMP_PELICULA (
        ID_PELICULA INT,
        DES_PELICULA STRING,
        DES_DURACION_PELICULA STRING,
        DES_TOP_ACTORES STRING,
        DES_RUTA_POSTER_FRONTAL STRING,
        DES_RUTA_POSTER_TRASERA STRING,
        FEC_LANZAMIENTO DATE,
        FEC_CARGA TIMESTAMP
        )
      USING DELTA
      LOCATION '{storageLocation}/silver-jptq/TBL_TMP_PELICULA'""")
      print(f"Tabla creada correctamente: {catalogo}.{esquema_stage}.TBL_TMP_PELICULA")

In [0]:
fn_create_table_tmp_pelicula(catalogo, esquema_stage)

In [0]:
def fn_truncate_table_tmp_pelicula(catalogo: str, esquema_stage: str):
    spark.sql(f"""
    TRUNCATE TABLE {catalogo}.{esquema_stage}.TBL_TMP_PELICULA
    """) 

In [0]:
fn_truncate_table_tmp_pelicula(catalogo, esquema_stage)

In [0]:
def fn_insert_table_tmp_pelicula(catalogo: str, esquema_stage: str):
  spark.sql(f"""
  INSERT INTO {catalogo}.{esquema_stage}.TBL_TMP_PELICULA
  SELECT * FROM tmp_df_tmp_pelicula
  """) 

In [0]:
fn_insert_table_tmp_pelicula(catalogo, esquema_stage) 

In [0]:
%sql
SELECT
*
FROM
catalog_jptq.silver_jptq.tbl_tmp_pelicula