In [0]:
%run ./00_Setup

In [0]:
%sql
-- Set the active schema and create a widget for the book store path
USE SCHEMA strata_lab;
CREATE WIDGET TEXT book_store_path DEFAULT '/Volumes/workspace/strata_lab/entrenamiento/book_store';

In [0]:
# Retrieve the book_store_path value from the widget
book_store_path = dbutils.widgets.get("book_store_path")

In [0]:
%sql
-- Remove the orders_updates table if it exists
DROP TABLE IF EXISTS orders_updates;

In [0]:
%sql
-- Preview the orders data from Parquet files
SELECT * FROM parquet.`/Volumes/workspace/strata_lab/entrenamiento/book_store/orders`

In [0]:
# Stream orders data from cloud storage into orders_updates table
(spark.readStream
  .format("cloudFiles")
  .option("cloudFiles.format", "parquet")
  .option("cloudFiles.schemaLocation", f"{book_store_path}/orders_schema_location")
  .load(f"{book_store_path}/orders")
  .writeStream
  .option("checkpointLocation", f"{book_store_path}/orders_checkpoint")
  .trigger(availableNow=True)
  .table("orders_updates")
)

In [0]:
%sql
-- Query all data from the orders_updates table
SELECT * FROM orders_updates

In [0]:
%sql
-- Count the number of rows in orders_updates table
SELECT count(*) FROM orders_updates;

In [0]:
# List files in the orders directory
display(
  dbutils.fs.ls(f"{book_store_path}/orders")
)

In [0]:
# Load sample data into the orders directory
load_data(row_count=35000)

In [0]:
# List files in the orders directory after loading data
display(
  dbutils.fs.ls(f"{book_store_path}/orders")
)

In [0]:
# Simulate streaming new data into orders_updates table
(
    spark.readStream
    .format("cloudFiles")
    .option("cloudFiles.format", "parquet")
    .option("cloudFiles.schemaLocation", f"{book_store_path}/orders_schema_location")
    .load(f"{book_store_path}/orders")
    .writeStream
    .option("checkpointLocation", f"{book_store_path}/orders_checkpoint")
    .trigger(availableNow=True)
    .table("orders_updates")
)

In [0]:
%sql
-- Count the number of rows in orders_updates table after streaming
SELECT count(*) FROM orders_updates;

In [0]:
%sql
-- Show the history of changes to orders_updates table
DESCRIBE HISTORY orders_updates;

In [0]:
# Remove checkpoint and schema location directories
dbutils.fs.rm(f"{book_store_path}/orders_checkpoint", True)
dbutils.fs.rm(f"{book_store_path}/orders_schema_location", True)