In [0]:
%fs ls dbfs:/FileStore/tables/my_data/retail_db_json/categories

In [0]:
%sql
DROP DATABASE IF EXISTS retail_db CASCADE

In [0]:
%sql
SET spark.sql.warehouse.dir

In [0]:
%sql
CREATE DATABASE IF NOT EXISTS retail_db

In [0]:
%sql
DESCRIBE DATABASE retail_db

In [0]:
%sql
SELECT current_database()

In [0]:
%sql
USE retail_db

In [0]:
%sql
DROP TABLE orders

In [0]:
%sql
CREATE TABLE orders (
  order_id BIGINT,
  order_date STRING,
  order_customer_id BIGINT,
  order_status STRING
) USING DELTA

In [0]:
%sql
DESCRIBE FORMATTED orders

In [0]:
import json

base_dir = "/FileStore/tables/my_data/retail_db"
base_output_dir = '/FileStore/tables/my_data/retail_db_json'
dataset_list = [
    'departments',
    'categories',
    'products',
    'customers',
    'orders',
    'order_items'
]

def get_columns(schemas_file, dataset_name):
    schemas_text = spark.read.text(schemas_file, wholetext=True).first().value
    schemas = json.loads(schemas_text)
    column_details = schemas[dataset_name]
    sorted_column_details = sorted(column_details, key=lambda col: col['column_position'])
    sorted_column_names = [col ['column_name'] for col in sorted_column_details]
    return sorted_column_names

for dataset in dataset_list:
    print(f'Processing {dataset} data')
    columns = get_columns(f'{base_dir}/schemas.json', dataset)
    dataframe = spark.read.csv(f'{base_dir}/{dataset}', inferSchema=True).toDF(*columns)
    dataframe.write.mode('overwrite').json(f'{base_output_dir}/{dataset}')

In [0]:
%sql
-- dbfs:/FileStore/tables/my_data/retail_db_json
SELECT * FROM JSON.`dbfs:/FileStore/tables/my_data/retail_db_json/orders`

In [0]:
%sql
COPY INTO orders
FROM 'dbfs:/FileStore/tables/my_data/retail_db_json/orders'
FILEFORMAT = JSON

In [0]:
%sql
SELECT * FROM orders

In [0]:
%sql
SHOW tables

## Managed vs External

Creating external tables means only the metadata is managed by the Spark Metastore.

Dropping an EXTERNAL table will drop its metadata from the store, but leave the data files intact.

Dropping a Managed table will drop its metadata and delete the undelrying data files.

In [0]:
%sql
CREATE EXTERNAL TABLE order_items (
  order_item_id BIGINT,
  order_item_order_id BIGINT,
  order_item_product_id BIGINT,
  ordeR_item_quantity BIGINT,
  order_item_subtotal DOUBLE,
  order_item_product_price DOUBLE
) USING DELTA
OPTIONS (
  path='dbfs:/user/hive/warehouse/retail_db.db/order_items'
)

In [0]:
%sql
DESCRIBE FORMATTED order_items

In [0]:
%sql
INSERT INTO order_items
SELECT order_item_id,
  order_item_order_id,
  order_item_product_id,
  ordeR_item_quantity,
  order_item_subtotal,
  order_item_product_price
FROM JSON.`dbfs:/FileStore/tables/my_data/retail_db_json/order_items`

In [0]:
%sql
SELECT order_item_order_id,
  round(sum(order_item_subtotal), 2) AS order_revenue
FROM order_items
GROUP BY 1
ORDER BY 1

## Why DELTA tables?

They allow you to execute CRUD operations against the tables.

In [0]:
%sql
DROP TABLE crud_demo

In [0]:
%sql
CREATE TABLE crud_demo (
  user_id INT,
  user_fname STRING,
  user_lname STRING,
  user_email STRING
) USING DELTA

In [0]:
%sql
INSERT INTO crud_demo
VALUES
  (1, 'Scott', 'Tiger', 'test@email.com'),
  (2, 'Donald', 'Duck', 'dduck@email.com'),
  (3, 'Mickey', 'Mouse', 'mmouse@email.com'),
  (4, 'Tom', 'Jerry', 'tjerry@email.com')

In [0]:
%sql
SELECT * FROM crud_demo

In [0]:
%sql
UPDATE crud_demo
SET user_email = 'jerryt@email.com'
WHERE user_email = 'tjerry@email.com'

## Merge

Insert, Update and sometimes Delete all in one statement

In [0]:
%sql
CREATE TABLE crud_demo_stg (
  user_id INT,
  user_fname STRING,
  user_lname STRING,
  user_email STRING
) USING DELTA

In [0]:
%sql
INSERT INTO crud_demo_stg
VALUES
  (3, 'Leighton', 'Darkins', 'ljdarkins@gmail.com'),
  (5, 'Jimmy', 'McSteve', 'jdawg@gmail.com'),
  (6, 'Brett', 'Doopus', 'doopus@gmail.com')

In [0]:
%sql
SELECT * FROM crud_demo_stg

In [0]:
%sql
MERGE INTO crud_demo AS cd
USING crud_demo_stg as cdg
  ON cd.user_id = cdg.user_id
WHEN MATCHED THEN UPDATE SET *
WHEN NOT MATCHED THEN INSERT *

In [0]:
%sql
DROP TABLE crud_demo

In [0]:
%sql
DROp TABLE crud_demo_stg