In [None]:
from pathlib import Path
import sys

repo_root = Path.cwd()
for candidate in [repo_root, *repo_root.parents]:
    setup_dir = candidate / 'notebooks' / '00_setup'
    if setup_dir.exists():
        break
else:
    raise RuntimeError('Could not locate notebooks/00_setup from this notebook location.')

setup_dir_str = str(setup_dir)
if setup_dir_str not in sys.path:
    sys.path.insert(0, setup_dir_str)

from bootstrap import ensure_src_on_path
ensure_src_on_path()

from sql_for_analysis.db.connection import get_connection


### easy practise for star schema (BASICS)

In [None]:
from sql_for_analysis.db.connection import get_connection
from tabulate import tabulate


#### CREATING DB FOR PRACTISE 
#####  Source System (Operational Database)

In [None]:
sql = """
DROP TABLE IF EXISTS operational_orders;

CREATE TABLE operational_orders (
    order_id INT,
    order_date DATE,
    customer_email VARCHAR(100),
    customer_name VARCHAR(100),
    customer_city VARCHAR(50),
    product_sku VARCHAR(50),
    product_name VARCHAR(100),
    product_category VARCHAR(50),
    quantity INT,
    unit_price DECIMAL(10,2)
);

INSERT INTO operational_orders VALUES
(1, '2024-01-15', 'alice@email.com', 'Alice', 'New York', 'LAP-001', 'Laptop', 'Electronics', 1, 1000.00),
(2, '2024-01-16', 'bob@email.com', 'Bob', 'London', 'MOU-001', 'Mouse', 'Electronics', 2, 25.00),
(3, '2024-01-17', 'alice@email.com', 'Alice', 'New York', 'KEY-001', 'Keyboard', 'Electronics', 1, 75.00),
(4, '2024-01-18', 'carol@email.com', 'Carol', 'Paris', 'LAP-001', 'Laptop', 'Electronics', 1, 1000.00);
"""


In [None]:

with get_connection() as connection:
    with connection.cursor() as cursor:
        cursor.execute(sql)
        connection.commit()   # REQUIRED for CREATE / INSERT


#### creating the fact and dimension table (customers)

In [None]:
sql = """
DROP TABLE IF EXISTS dim_customers;

CREATE TABLE dim_customers (
    customer_key SERIAL PRIMARY KEY,    -- Auto-incrementing surrogate key
    customer_email VARCHAR(100),
    customer_name VARCHAR(100),
    customer_city VARCHAR(50)
);

INSERT INTO dim_customers (customer_email, customer_name, customer_city)
SELECT DISTINCT
    customer_email,
    customer_name,
    customer_city
FROM operational_orders;
"""


In [None]:

with get_connection() as connection:
    with connection.cursor() as cursor:
        cursor.execute(sql)
        connection.commit()   # REQUIRED for CREATE / INSERT


In [None]:
sql = """
    SELECT * FROM dim_customers;
"""


In [None]:
sql = """
DROP TABLE IF EXISTS dim_products;

CREATE TABLE dim_products (
    product_key SERIAL PRIMARY KEY,
    product_sku VARCHAR(50),
    product_name VARCHAR(100),
    product_category VARCHAR(50)
);

INSERT INTO dim_products (product_sku, product_name, product_category)
SELECT DISTINCT
    product_sku,
    product_name,
    product_category
FROM operational_orders;
"""


In [None]:

with get_connection() as connection:
    with connection.cursor() as cursor:
        cursor.execute(sql)
        connection.commit()   # REQUIRED for CREATE / INSERT


In [None]:
sql = """
  SELECT * FROM dim_products;
"""


In [None]:
sql = """
DROP TABLE IF EXISTS dim_date;

CREATE TABLE dim_date (
    date_key INT PRIMARY KEY,
    full_date DATE,
    day_of_week VARCHAR(10),
    month_name VARCHAR(10),
    year INT
);

INSERT INTO dim_date (date_key, full_date, day_of_week, month_name, year)
SELECT DISTINCT
    TO_CHAR(order_date, 'YYYYMMDD')::INT,
    order_date,
    TO_CHAR(order_date, 'Day'),
    TO_CHAR(order_date, 'Month'),
    EXTRACT(YEAR FROM order_date)::INT
FROM operational_orders;
"""


In [None]:

with get_connection() as connection:
    with connection.cursor() as cursor:
        cursor.execute(sql)
        connection.commit()   # REQUIRED for CREATE / INSERT


In [None]:
sql = """
  SELECT * FROM dim_date;
"""


In [None]:
sql = """
  CREATE TABLE fact_sales (
    sale_id SERIAL PRIMARY KEY,
    date_key INT REFERENCES dim_date(date_key),
    customer_key INT REFERENCES dim_customers(customer_key),
    product_key INT REFERENCES dim_products(product_key),

    -- Measurements
    quantity INT,
    unit_price DECIMAL(10,2),
    total_amount DECIMAL(10,2)
);
"""


In [None]:

with get_connection() as connection:
    with connection.cursor() as cursor:
        cursor.execute(sql)
        connection.commit()   # REQUIRED for CREATE / INSERT


In [None]:
sql = """
INSERT INTO fact_sales (date_key, customer_key, product_key, quantity, unit_price, total_amount)
SELECT
    TO_CHAR(o.order_date, 'YYYYMMDD')::INT AS date_key,
    c.customer_key,
    p.product_key,
    o.quantity,
    o.unit_price,
    o.quantity * o.unit_price AS total_amount
FROM operational_orders o
INNER JOIN dim_customers c ON o.customer_email = c.customer_email
INNER JOIN dim_products p ON o.product_sku = p.product_sku ;       """

In [None]:

with get_connection() as connection:
    with connection.cursor() as cursor:
        cursor.execute(sql)
        connection.commit()   # REQUIRED for CREATE / INSERT


In [None]:
sql = """
  SELECT * FROM fact_sales;
"""


In [None]:
with get_connection() as connection:
    with connection.cursor() as cursor:
        cursor.execute(sql)
        rows = cursor.fetchall()
        headers = [desc[0] for desc in cursor.description] # type: ignore

print(tabulate(rows, headers=headers, tablefmt="psql"))

Business Question: "Show me sales by day of week"

In [None]:
sql = """
SELECT
    d.day_of_week,
    SUM(f.total_amount) AS total_sales,
    COUNT(*) AS number_of_orders
FROM fact_sales f
INNER JOIN dim_date d ON f.date_key = d.date_key
GROUP BY d.day_of_week
ORDER BY total_sales DESC;
"""


In [None]:
with get_connection() as connection:
    with connection.cursor() as cursor:
        cursor.execute(sql)
        rows = cursor.fetchall()
        headers = [desc[0] for desc in cursor.description] # type: ignore

print(tabulate(rows, headers=headers, tablefmt="psql"))