## Ingestión del archivo "country.json"

### Paso 1 - Leer el archivo JSON usando DataFrameReader de Spark

In [0]:
v_environment =  dbutils.widgets.get("p_environment")

In [0]:
dbutils.widgets.text("p_file_date", "2024-12-16")
v_file_date =  dbutils.widgets.get("p_file_date")

In [0]:
countries_schema = "countryId INT, countryIsoCode STRING, countryName STRING"

In [0]:
countries_df = spark.read.schema(countries_schema).json(f"/mnt/historymovie1416/bronze/{v_file_date}/country.json")

In [0]:
countries_df.printSchema()

### Paso 2 - Eliminar las columnas no deseadas del DataFrame y agregar las nuevas

In [0]:
countries_dropped_df = countries_df.drop("countryIsoCode")
# countries_dropped_df = countries_df.drop(col("countryIsoCode"))

In [0]:
from pyspark.sql.functions import col, current_timestamp, lit
countries_final_df = countries_dropped_df\
    .withColumnRenamed("countryId", "country_id")\
    .withColumnRenamed("countryName", "country_name")\
    .withColumn("ingestion_date", current_timestamp())\
    .withColumn("environment", lit("DEV"))\
    .withColumn("file_date", lit(v_file_date))

### Paso 3 - Escribir la salida en un formato 'Parquet'

In [0]:
countries_final_df.write.mode("overwrite").format("delta").saveAsTable("movie_silver.country")

In [0]:
dbutils.notebook.exit("Success")