# Imports

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

#Widgets

In [0]:
dbutils.widgets.removeAll()

In [0]:
dbutils.widgets.text("storageName", "stjosuecampos")
dbutils.widgets.text("container", "raw")
dbutils.widgets.text("catalog", "catalog_supermarket")
dbutils.widgets.text("schema", "bronze")

In [0]:
print("storageName:", dbutils.widgets.get("storageName"))
print("container  :", dbutils.widgets.get("container"))
print("catalog    :", dbutils.widgets.get("catalog"))
print("schema     :", dbutils.widgets.get("schema"))

#Constants

In [0]:
storage_name = dbutils.widgets.get("storageName")
container    = dbutils.widgets.get("container")
catalog      = dbutils.widgets.get("catalog")
schema       = dbutils.widgets.get("schema")

#Path

In [0]:
path_base = f"abfss://{container}@{storage_name}.dfs.core.windows.net/"
path_departments = f"{path_base}departments.csv"

#Exploration

In [0]:
df_preview = (
    spark.read
         .option("header", True)
         .option("sep", ";")
         .csv(path_departments)
)

df_preview.show(5)
df_preview.printSchema()


#Structures

In [0]:
departments_schema = StructType([
    StructField("department_id", IntegerType(), False),
    StructField("department", StringType(), True)
])

#Read Resource

In [0]:
df_departments_raw = (
    spark.read
         .option("header", True)
         .option("sep", ";")
         .schema(departments_schema)
         .csv(path_departments)
)



In [0]:
df_departments_raw.show(5)
df_departments_raw.printSchema()

#Save

In [0]:
(
    df_departments_raw
        .write
        .mode("overwrite")
        .saveAsTable(f"{catalog}.{schema}.departments")
)

print(f"Tabla creada/actualizada: {catalog}.{schema}.departments")

display(spark.table(f"{catalog}.{schema}.departments").limit(20))