#### CTE (Common Table Expression):
- A CTE is a named, temporary query that exists only for a single SQL statement. It improves readability and structure but cannot be reused across multiple queries.

#### Temporary Table:
- A temporary table is a session-scoped table that persists for the duration of the database connection. It can be queried, reused, indexed, and modified across multiple statements within that session.

#### Rule of thumb:
- Use a CTE for clarity in one query; use a temporary table when you need reuse or better performance across multiple steps.

In [None]:
from db_connection import get_connection
from tabulate import tabulate

In [None]:
sql = """
 SELECT * FROM orders;
"""

In [None]:
with get_connection() as connection:
    with connection.cursor() as cursor:
        cursor.execute(sql)
        rows = cursor.fetchall()


headers = ["order_id", "customer_id", "order_date", "amount"]
print(tabulate(rows, headers=headers, tablefmt="psql"))



Problem: Calculate total sales per customer, then find customers who spent more than $1000.


In [None]:
sql = """
with customer_totals as (
    select
        customer_id,
        sum(amount) as total_spent,
        count(*) as total_orders
    from orders
    group by customer_id
)

SELECT
    customer_id,
    total_orders,
    total_spent
FROM customer_totals
WHERE total_spent > 1000
ORDER BY total_spent DESC;
"""

In [None]:
with get_connection() as connection:
    with connection.cursor() as cursor:
        cursor.execute(sql)
        rows = cursor.fetchall()
        headers = [desc[0] for desc in cursor.description]

print(tabulate(rows, headers=headers, tablefmt="psql"))



Problem: Calculate monthly sales trends and identify months with above-average performance.

In [None]:
sql = """
WITH monthly_sales AS (
    SELECT
        DATE_TRUNC('month', sale_date) AS month,
        SUM(revenue) AS monthly_revenue,
        SUM(quantity) AS monthly_quantity
    FROM daily_sales
    GROUP BY DATE_TRUNC('month', sale_date)
),
average_metrics AS (
    SELECT
        AVG(monthly_revenue) AS avg_revenue,
        AVG(monthly_quantity) AS avg_quantity
    FROM monthly_sales
)
SELECT
    m.month,
    m.monthly_revenue,
    m.monthly_quantity
FROM monthly_sales m
CROSS JOIN average_metrics a
WHERE
    m.monthly_revenue > a.avg_revenue
ORDER BY m.month;
"""

In [None]:
with get_connection() as connection:
    with connection.cursor() as cursor:
        cursor.execute(sql)
        rows = cursor.fetchall()
        headers = [desc[0] for desc in cursor.description]

print(tabulate(rows, headers=headers, tablefmt="psql"))


SQL Example 3: CTEs for Data Quality Checks (Interview Favorite!)

In [None]:
#  check for duplicate records
sql = """
  WITH duplicate_check AS (
    SELECT
        customer_email,
        order_date,
        amount,
        COUNT(*) as duplicate_count
    FROM raw_orders
    GROUP BY customer_email, order_date, amount
    HAVING COUNT(*) > 1
),
amount_check AS (
select
    order_id,
    customer_email,
    amount,
    CASE
        WHEN amount < 0 then 'negative amount'
        WHEN amount > 0. then 'unsually high'
        ELSE 'OK'
    end as amount_flag
from raw orders
),
final_quality_report as (

)

SELECT *
FROM final_quality_report
WHERE amount_flag != 'ok' OR duplicate_flag = 'potential_duplicate'
ORDER BY order_id;
"""

In [None]:
with get_connection() as connection:
    with connection.cursor() as cursor:
        cursor.execute(sql)
        rows = cursor.fetchall()
        headers = [desc[0] for desc in cursor.description]

print(tabulate(rows, headers=headers, tablefmt="psql"))
