In [0]:
dbutils.widgets.removeAll()

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
dbutils.widgets.text("container", "raw-jptq")
dbutils.widgets.text("catalogo", "catalog_jptq")
dbutils.widgets.text("esquema", "bronze_jptq")
dbutils.widgets.text("datalake", "adlsjptq0126")
dbutils.widgets.text("file", "Peliculas")
dbutils.widgets.text("name_file", "FilmDetails.csv")
dbutils.widgets.text("storageLocation", "abfss://unity-catalog-jptq@adlsjptq0126.dfs.core.windows.net")

In [0]:
container = dbutils.widgets.get("container")
catalogo = dbutils.widgets.get("catalogo")
esquema = dbutils.widgets.get("esquema")
datalake = dbutils.widgets.get("datalake")
file = dbutils.widgets.get("file")
name_file = dbutils.widgets.get("name_file")
storageLocation = dbutils.widgets.get("storageLocation")
ruta = f"abfss://{container}@{datalake}.dfs.core.windows.net/{file}/{name_file}"

In [0]:
df_details_movies = spark.read.option('header', True)\
                        .option('inferSchema', True)\
                        .csv(ruta)

In [0]:
details_movies_schema = StructType(fields=[StructField("id", IntegerType(), False),
                                     StructField("director", StringType(), True),
                                     StructField("top_billed", StringType(), True),
                                     StructField("budget_usd", IntegerType(), True),
                                     StructField("revenue_usd", IntegerType(), True)
])

In [0]:
df_details_movies_final = spark.read\
.option('header', True)\
.schema(details_movies_schema)\
.csv(ruta)

In [0]:
details_movies_selected_df = df_details_movies_final.select(col("id"), 
                                                col("director"), 
                                                col("top_billed"), 
                                                col("budget_usd"), 
                                                col("revenue_usd"))

In [0]:
details_movies_renamed_df = details_movies_selected_df.withColumnRenamed("id", "ID_PELICULA") \
                                            .withColumnRenamed("director", "DES_DIRECTOR") \
                                            .withColumnRenamed("top_billed", "DES_TOP_ACTORES") \
                                            .withColumnRenamed("budget_usd", "VAL_PRESUPUESTO_USD") \
                                            .withColumnRenamed("revenue_usd", "VAL_GANANCIAS_USD") 

In [0]:
details_movies_final_df = details_movies_renamed_df.withColumn("FEC_CARGA", current_timestamp())

In [0]:
#details_movies_final_df.write.mode("overwrite").saveAsTable(f"{catalogo}.{esquema}.TBL_DETALLE_PELICULAS")
details_movies_final_df.createOrReplaceTempView("tmp_details_movies_final_df")

In [0]:
def fn_create_table_detalle_peliculas(catalogo: str, esquema: str):
   if spark.catalog.tableExists(f"{catalogo}.{esquema}.TBL_DETALLE_PELICULAS"):
      print(f"La tabla ya existe: {catalogo}.{esquema}.TBL_DETALLE_PELICULAS")
   else:
      spark.sql(f"""CREATE TABLE IF NOT EXISTS {catalogo}.{esquema}.TBL_DETALLE_PELICULAS (
        ID_PELICULA INT,
        DES_DIRECTOR STRING,
        DES_TOP_ACTORES STRING,
        VAL_PRESUPUESTO_USD INT,
        VAL_GANANCIAS_USD INT,
        FEC_CARGA TIMESTAMP
        )
      USING DELTA
      LOCATION '{storageLocation}/bronze-jptq/TBL_DETALLE_PELICULAS'""")
      print(f"Tabla creada correctamente: {catalogo}.{esquema}.TBL_DETALLE_PELICULAS")

In [0]:
fn_create_table_detalle_peliculas(catalogo, esquema)

In [0]:
def fn_truncate_table_detalle_peliculas(catalogo: str, esquema: str):
    spark.sql(f"""
    TRUNCATE TABLE {catalogo}.{esquema}.TBL_DETALLE_PELICULAS
    """) 

In [0]:
fn_truncate_table_detalle_peliculas(catalogo, esquema)

In [0]:
def fn_insert_table_detalle_peliculas(catalogo: str, esquema: str):
  spark.sql(f"""
  INSERT INTO {catalogo}.{esquema}.TBL_DETALLE_PELICULAS
  SELECT * FROM tmp_details_movies_final_df
  """) 

In [0]:
fn_insert_table_detalle_peliculas(catalogo, esquema) 

In [0]:
%sql
SELECT
*
FROM
catalog_jptq.bronze_jptq.tbl_detalle_peliculas