### Paso 1 - Creación de la estructura del DataFrame 'genre'

In [0]:
v_environment =  dbutils.widgets.get("p_environment")

In [0]:
dbutils.widgets.text("p_file_date", "2024-12-16")
v_file_date =  dbutils.widgets.get("p_file_date")

In [0]:
from pyspark.sql.functions import col, current_timestamp, lit
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

In [0]:
genre_schema = StructType([
    StructField("genreId", IntegerType()),
    StructField("genreName", StringType()),
])

### Paso 2 - Leer el archivo 'genre.csv'

In [0]:
genre = spark.read\
    .option("header", True)\
        .schema(genre_schema)\
            .csv(f"/mnt/historymovie1416/bronze/{v_file_date}/genre.csv")

### Paso 3 - Cambiar nombres a las columnas y agregar nuevas

In [0]:
genre_df = genre.withColumnRenamed("genreId", "genre_id")\
    .withColumnRenamed("genreName", "genre_name")\
    .withColumn("ingestion_date", current_timestamp())\
    .withColumn("environment", lit("DEV"))\
    .withColumn("file_date", lit(v_file_date))

In [0]:
genre_df.printSchema()
display(genre_df)

### Paso 4 - Almacenar el archivo en formato parquet

In [0]:
genre_df.write.mode("overwrite").format("delta").saveAsTable("movie_silver.genre")

In [0]:
%sql
SELECT * FROM movie_silver.genre;

In [0]:
dbutils.notebook.exit("Success")