# Notebook to store sales records (JSON) into bronze.sale table

Define constant variables (location, target entity)

In [0]:
FILES_LOCATION= '/Volumes/workspace/default/staging/sales/'
TARGET_SCHEMA="bronze"
TARGET_TABLE="sales"

### Getting the sales (JSON)

Get all *.json file in `FILES_LOCATION`

In [0]:
files=dbutils.fs.ls(FILES_LOCATION)

sales_files=[f.path for f in files if f.path.endswith(".json")]

Defining the JSON schema before read
```json
{
  "latitude": 6.29169401918482,
  "longitude": -75.60110546619606,
  "date": "12/05/2024 10:43:19",
  "customer_id": 1888,
  "employee_id": 9438,
  "quantity_products": 23,
  "order_id": "d8b9b417-b098-4344-b137-362894e4dccb"
}
```

In [0]:
from pyspark.sql.types import StructType, StructField, DoubleType, StringType, IntegerType

schema = StructType([
    StructField("latitude",DoubleType(),True),
    StructField("longitude",DoubleType(),True),
    StructField("date", StringType(),False),
    StructField("customer_id",IntegerType(),False),
    StructField("employee_id",IntegerType(),False),
    StructField("quantity_products",IntegerType(),True),
    StructField("order_id",StringType(),False)
])

### Read *.json files and load them into a DataFrame

In [0]:
df_sales=spark.read.schema(schema).json(sales_files)

Add ingestion timestamp

In [0]:
from pyspark.sql.functions import current_timestamp

df_sales=df_sales.withColumn("ingestime", current_timestamp())

columns = ["ingestime"] + [col for col in df_sales.columns if col != "ingestime"]
df_sales = df_sales.select(columns)

### Create Schema in Catalog

In [0]:
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {TARGET_SCHEMA}")

### Write into bronze.sales

In [0]:
df_sales.write.partitionBy("ingestime").format("delta").mode("overwrite").saveAsTable(f"{TARGET_SCHEMA}.{TARGET_TABLE}")

### Move the processed JSON files into a folder