In [0]:
%run ./00_Setup

In [0]:
%sql
-- Create a widget to store the path for book store data
CREATE WIDGET TEXT book_store_path DEFAULT '/Volumes/workspace/strata_lab/entrenamiento/book_store';

In [0]:
%sql
-- Create the orders table from Parquet data
CREATE TABLE orders AS
SELECT * FROM parquet.`${book_store_path}/orders`

In [0]:
%sql
-- Preview first 10 rows of orders table
SELECT * FROM orders
LIMIT 10

In [0]:
%sql
-- Overwrite the orders table with new data from Parquet
CREATE OR REPLACE TABLE orders AS
SELECT * FROM parquet.`${book_store_path}/orders`

In [0]:
%sql
-- Show the history of operations on the orders table
DESCRIBE HISTORY orders;

In [0]:
%sql
-- Overwrite orders table using INSERT OVERWRITE (safer for schema)
INSERT OVERWRITE orders
SELECT * FROM parquet.`${book_store_path}/orders`

In [0]:
%sql
-- Show the history of operations on the orders table (DESC)
DESC HISTORY orders;

In [0]:
%sql
-- This operation will fail because of schema mismatch
/*
INSERT OVERWRITE orders
SELECT *, current_timestamp() FROM parquet.`${book_store_path}/orders`
*/

In [0]:
%sql
-- Insert new data into orders table (no deduplication or idempotency)
INSERT INTO orders
SELECT * FROM parquet.`${book_store_path}/orders_new`

In [0]:
%sql
-- Count total records in orders table
SELECT COUNT(*) FROM orders;

In [0]:
%sql
-- Create or replace customers table from JSON data
CREATE OR REPLACE TABLE customers AS
SELECT * FROM json.`${book_store_path}/customers`;

In [0]:
%sql
-- Use MERGE to update customers table for idempotency
CREATE OR REPLACE TEMP VIEW customers_update AS
SELECT * FROM json.`${book_store_path}/customers-json-new/`;

MERGE INTO customers c
USING customers_update u
ON c.customer_id = u.customer_id
WHEN MATCHED AND c.email IS NULL AND u.email IS NOT NULL THEN 
    UPDATE SET c.email = u.email, c.updated = u.updated
-- Omitimos el updated, no existe esa columna
WHEN NOT MATCHED THEN INSERT *

In [0]:
%sql
-- Repeat MERGE to show no changes since data is already updated
MERGE INTO customers c
USING customers_update u
ON c.customer_id = u.customer_id
WHEN MATCHED AND c.email IS NULL AND u.email IS NOT NULL THEN 
    UPDATE SET email = u.email
-- Omitimos el updated, no existe esa columna
WHEN NOT MATCHED THEN INSERT *

In [0]:
%sql
-- Create temporary view books_update from CSV file
CREATE OR REPLACE TEMP VIEW books_update
(book_id STRING, title STRING, author STRING, category STRING, price DOUBLE)
USING CSV
OPTIONS (
  path = "${book_store_path}/books-csv-new",
  header = "true",
  delimeter = ","
);
-- Preview books_update view
SELECT * FROM books_update

In [0]:
%sql
-- Merge books table to insert new Computer Science books
MERGE INTO books b
USING books_update u
ON b.book_id = u.book_id AND b.title = u.title
WHEN NOT MATCHED AND u.category = 'Computer Science' THEN
  INSERT *