In [None]:
from db_connection import get_connection
from tabulate import tabulate

### Create Sample Orders Table

```sql
-- Create the orders table
CREATE TABLE orders (
    order_id        INT             PRIMARY KEY,
    customer_id     INT,
    order_date      DATE,
    product_category VARCHAR(50),
    revenue         DECIMAL(10,2),
    region          VARCHAR(50)
);

-- Insert sample data
INSERT INTO orders (order_id, customer_id, order_date, product_category, revenue, region) VALUES
(1,  101, '2024-01-15', 'Electronics', 1200.00, 'East'),
(2,  102, '2024-01-16', 'Electronics',  800.00, 'West'),
(3,  101, '2024-01-17', 'Clothing',     150.00, 'East'),
(4,  103, '2024-01-18', 'Electronics', 2500.00, 'East'),
(5,  102, '2024-01-19', 'Clothing',     200.00, 'West'),
(6,  104, '2024-01-20', 'Electronics',  950.00, 'South'),
(7,  101, '2024-01-21', 'Electronics', 1800.00, 'East'),
(8,  105, '2024-01-22', 'Clothing',     300.00, 'East'),
(9,  103, '2024-01-23', 'Electronics', 1100.00, 'East'),
(10, 102, '2024-01-24', 'Clothing',     175.00, 'West');

In [None]:
sql = """
SELECT * from orders;
"""

Assign a sequential number to all orders sorted by order_date (oldest first).

Goal: Understand ROW_NUMBER() without partitions.

In [None]:
sql = """
SELECT
    * ,
    row_number() over (order by order_date desc) as row_num

from orders;
"""

### Highest Revenue Order

Rank all orders by revenue in descending order and return:

order_id

revenue

rank

Use RANK().

In [None]:
sql = """
SELECT
    order_id,
    revenue,
    RANK() OVER (ORDER BY revenue DESC) as rank_num,
    DENSE_RANK() OVER (ORDER BY revenue DESC) as dense_rank_num
FROM orders
ORDER BY revenue DESC;
"""

Return only the top 3 highest-revenue orders using a ranking function.

In [None]:
sql = """
SELECT
    order_id,
    revenue
    from
        (
            SELECT
                order_id,
                revenue,
                rank() OVER (ORDER BY revenue DESC) as rnk
            FROM orders
        )  rank_data
    where rnk <= 4
"""

Level 2: Beginner–Intermediate (Partitioning)

### Orders Ranked Within Each Product Category

For each product_category, rank orders by revenue (highest first).

Output:

order_id

product_category

revenue

rank

Goal: Learn PARTITION BY.

In [None]:
sql = """
SELECT
    order_id,
    product_category ,
    revenue,
    rank() over (partition by product_category order by revenue desc) as rank_data
from orders

"""

First Order per Customer

In [None]:
sql = """
SELECT
    *
from
(
    select
        customer_id,
        order_id,
        product_category ,
        revenue,
        row_number() over (partition by customer_id  order by order_date asc) as order_data
    from orders
)
where order_data = 1
"""

Find the highest revenue order in each region.

cant max function here as it just compute the value nothing more 

In [None]:
sql = """
SELECT
    region,
    order_id,
    revenue
FROM (
    SELECT
        region,
        order_id,
        revenue,
        ROW_NUMBER() OVER (
            PARTITION BY region
            ORDER BY revenue DESC
        ) AS rn
    FROM orders
) t
WHERE rn = 1;

"""

Rank Orders per Customer

Rank each customer’s orders by revenue (highest first).

Question:
Which ranking function is most appropriate if two orders have the same revenue?

In [None]:
sql = """
    SELECT
        *,
        DENSE_RANK() OVER (
            PARTITION BY customer_id
            ORDER BY revenue DESC
        ) AS rn
    FROM orders

"""

Level 3: Intermediate (Filtering on Rank)

Top 2 Orders per Customer

Return the top 2 highest revenue orders for each customer.

Constraint:
Use a window function + filtering.

In [None]:
sql = """
SELECT
   *
FROM (
    SELECT
        *,
        dense_rank() OVER (
            PARTITION BY customer_id
            ORDER BY revenue DESC
        ) AS rn
    FROM orders
) t
WHERE rn  <=2;

"""

Second Highest Revenue Order per Region

For each region, return the second highest revenue order.

Hint:
Think carefully about ties.

In [None]:
sql = """
SELECT
   *
FROM (
    SELECT
        *,
        DENSE_RANK() OVER (
            PARTITION BY region
            ORDER BY revenue DESC
        ) AS rn
    FROM orders
) t
WHERE rn = 2;

"""

Customers with Multiple Top-Ranked Orders

Find customers who have more than one order ranked #1 by revenue (per customer).

Goal: Understand ranking + aggregation.

In [None]:
sql = """
SELECT
   customer_id
FROM (
    SELECT
        *,
        DENSE_RANK() OVER (
            PARTITION BY customer_id
            ORDER BY revenue DESC
        ) AS rnk
    FROM orders
) t
where rnk = 1
group by customer_id
HAVING COUNT(*) > 1;

"""

Level 5: Real-World Business Scenarios
Top Category per Region

For each region, find the product category with the highest total revenue.

Steps involved:

Aggregate revenue

Rank results

In [None]:
sql = """
SELECT
   *
FROM (
    SELECT
        region,
        product_category,
        total_revenue,
        DENSE_RANK() OVER (
            PARTITION BY region
            ORDER BY total_revenue DESC
        ) AS rnk
    FROM (
        SELECT
            region,
            product_category,
            sum(revenue) as total_revenue
            from orders
            group by region,product_category
    ) as agg
) t
where rnk = 1

"""

NTILES BASICS PROBLEMS

Problem 1: Revenue Quartiles (No Partition)

In [None]:
sql = """
SELECT
    * ,
    ntile(4) over (order by revenue desc) as ntiles_data
from orders

"""

Problem 2: Revenue Buckets per Product Category

In [None]:
sql = """
SELECT
    * ,
    ntile(4) over (partition by product_category order by revenue desc) as ntiles_data
from orders
"""

Problem 3: Customer Spending Segments

In [None]:
sql = """
SELECT
    customer_id ,
    order_id ,
    revenue ,
    ntile(3) over (partition by customer_id order by revenue desc) as spend_bucket
from orders
"""

Problem 4: Regional Order Priority

In [None]:
sql = """
SELECT
    region,
    order_id,
    revenue,
    NTILE(4) OVER (
        PARTITION BY region
        ORDER BY revenue DESC
    ) AS priority_level
FROM orders;

"""

Problem 5: Identify Top 25% Orders

In [None]:
sql = """
SELECT
    order_id,
    revenue
FROM (
    SELECT
        order_id,
        revenue,
        NTILE(4) OVER (ORDER BY revenue DESC) AS revenue_bucket
    FROM orders
) t
WHERE revenue_bucket = 1;


"""

In [None]:
with get_connection() as connection:
    with connection.cursor() as cursor:
        cursor.execute(sql)
        rows = cursor.fetchall()
        headers = [desc[0] for desc in cursor.description]

print(tabulate(rows, headers=headers, tablefmt="psql"))
