In [0]:
dbutils.widgets.removeAll()

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
dbutils.widgets.text("container", "raw-jptq")
dbutils.widgets.text("catalogo", "catalog_jptq")
dbutils.widgets.text("esquema", "bronze_jptq")
dbutils.widgets.text("datalake", "adlsjptq0126")
dbutils.widgets.text("file", "Peliculas")
dbutils.widgets.text("name_file", "PosterPath.csv")
dbutils.widgets.text("storageLocation", "abfss://unity-catalog-jptq@adlsjptq0126.dfs.core.windows.net")

In [0]:
container = dbutils.widgets.get("container")
catalogo = dbutils.widgets.get("catalogo")
esquema = dbutils.widgets.get("esquema")
datalake = dbutils.widgets.get("datalake")
file = dbutils.widgets.get("file")
name_file = dbutils.widgets.get("name_file")
storageLocation = dbutils.widgets.get("storageLocation")
ruta = f"abfss://{container}@{datalake}.dfs.core.windows.net/{file}/{name_file}"

In [0]:
df_poster_movies = spark.read.option('header', True)\
                        .option('inferSchema', True)\
                        .csv(ruta)

In [0]:
poster_movies_schema = StructType(fields=[StructField("id", IntegerType(), False),
                                     StructField("poster_path", StringType(), True),
                                     StructField("backdrop_path", StringType(), True)
])

In [0]:
df_poster_movies_final = spark.read\
.option('header', True)\
.schema(poster_movies_schema)\
.csv(ruta)

In [0]:
poster_movies_selected_df = df_poster_movies_final.select(col("id"), 
                                                col("poster_path"), 
                                                col("backdrop_path"))

In [0]:
poster_movies_renamed_df = poster_movies_selected_df.withColumnRenamed("id", "ID_PELICULA") \
                                            .withColumnRenamed("poster_path", "DES_RUTA_POSTER_FRONTAL") \
                                            .withColumnRenamed("backdrop_path", "DES_RUTA_POSTER_TRASERA")

In [0]:
poster_movies_final_df = poster_movies_renamed_df.withColumn("FEC_CARGA", current_timestamp())

In [0]:
#poster_movies_final_df.write.mode("overwrite").saveAsTable(f"{catalogo}.{esquema}.TBL_POSTER_PELICULAS")
poster_movies_final_df.createOrReplaceTempView("tmp_poster_movies_final_df")

In [0]:
def fn_create_table_poster_peliculas(catalogo: str, esquema: str):
   if spark.catalog.tableExists(f"{catalogo}.{esquema}.TBL_POSTER_PELICULAS"):
      print(f"La tabla ya existe: {catalogo}.{esquema}.TBL_POSTER_PELICULAS")
   else:
      spark.sql(f"""CREATE TABLE IF NOT EXISTS {catalogo}.{esquema}.TBL_POSTER_PELICULAS (
        ID_PELICULA INT,
        DES_RUTA_POSTER_FRONTAL STRING,
        DES_RUTA_POSTER_TRASERA STRING,
        FEC_CARGA TIMESTAMP
        )
      USING DELTA
      LOCATION '{storageLocation}/bronze-jptq/TBL_POSTER_PELICULAS'""")
      print(f"Tabla creada correctamente: {catalogo}.{esquema}.TBL_POSTER_PELICULAS")

In [0]:
fn_create_table_poster_peliculas(catalogo, esquema)

In [0]:
def fn_truncate_table_poster_peliculas(catalogo: str, esquema: str):
    spark.sql(f"""
    TRUNCATE TABLE {catalogo}.{esquema}.TBL_POSTER_PELICULAS
    """) 

In [0]:
fn_truncate_table_poster_peliculas(catalogo, esquema)

In [0]:
def fn_insert_table_poster_peliculas(catalogo: str, esquema: str):
  spark.sql(f"""
  INSERT INTO {catalogo}.{esquema}.TBL_POSTER_PELICULAS
  SELECT * FROM tmp_poster_movies_final_df
  """) 

In [0]:
fn_insert_table_poster_peliculas(catalogo, esquema) 

In [0]:
%sql
SELECT
*
FROM
catalog_jptq.bronze_jptq.tbl_poster_peliculas