In [172]:
import duckdb
import pandas as pd

duckdb.sql("""
    CREATE OR REPLACE TABLE orders AS 
    SELECT 
        "Customer ID" AS customer_id,
        "Customer Status" AS customer_status,
        "Order ID" AS order_id,
        "Product ID" AS product_id,
        "Cost Price Per Unit" AS cost_price_per_unit,
        "Total Retail Price for This Order" AS total_retail_price,
        "Quantity Ordered" AS quantity_ordered,
        "Date Order was placed" AS order_date,
        "Delivery Date" AS delivery_date
    FROM read_csv_auto('orders.csv');
""")

duckdb.sql("""
    CREATE OR REPLACE TABLE product_supplier AS 
    SELECT 
        "Product ID" AS product_id,
        "Product Category" AS product_category,
        "Supplier Name" AS supplier_name,
        "Product Name" AS product_name,
        "Product Line" AS product_line,
        "Product Group" AS product_group,
        "Supplier Country" AS supplier_country
    FROM read_csv_auto('product-supplier.csv');
""")



In [173]:
duckdb.sql("SELECT * FROM orders LIMIT 5;")


┌─────────────┬─────────────────┬───────────┬──────────────┬─────────────────────┬────────────────────┬──────────────────┬────────────┬───────────────┐
│ customer_id │ customer_status │ order_id  │  product_id  │ cost_price_per_unit │ total_retail_price │ quantity_ordered │ order_date │ delivery_date │
│    int64    │     varchar     │   int64   │    int64     │       double        │       double       │      int64       │  varchar   │    varchar    │
├─────────────┼─────────────────┼───────────┼──────────────┼─────────────────────┼────────────────────┼──────────────────┼────────────┼───────────────┤
│         579 │ Silver          │ 123002578 │ 220101400106 │                20.7 │               92.6 │                2 │ 01-Jan-17  │ 07-Jan-17     │
│        7574 │ SILVER          │ 123004074 │ 210201000009 │                9.95 │               21.7 │                1 │ 01-Jan-17  │ 05-Jan-17     │
│       28861 │ Gold            │ 123000871 │ 230100500068 │                 0.8 │      

In [174]:
duckdb.sql("SELECT * FROM product_supplier LIMIT 5;")


┌──────────────┬───────────────────┬───────────────────────────┬────────────────────────────────────────┬──────────────┬──────────────────────┬──────────────────┐
│  product_id  │ product_category  │       supplier_name       │              product_name              │ product_line │    product_group     │ supplier_country │
│    int64     │      varchar      │          varchar          │                varchar                 │   varchar    │       varchar        │     varchar      │
├──────────────┼───────────────────┼───────────────────────────┼────────────────────────────────────────┼──────────────┼──────────────────────┼──────────────────┤
│ 210100100001 │ Children Outdoors │ Scandinavian Clothing A/S │ Boy's and Girl's Ski Pants with Braces │ Children     │ Outdoor things, Kids │ NO               │
│ 210100100002 │ Children Outdoors │ Luna sastreria S.A.       │ Children's Jacket                      │ Children     │ Outdoor things, Kids │ ES               │
│ 210100100003 │ Child

Query : 1
Retrieve all customer records
Extract the full dataset of customers without any filters or limitations.

In [175]:
duckdb.sql("""
SELECT DISTINCT customer_id, customer_status
FROM orders
""")

┌─────────────┬─────────────────┐
│ customer_id │ customer_status │
│    int64    │     varchar     │
├─────────────┼─────────────────┤
│       51897 │ Silver          │
│       56516 │ SILVER          │
│       57192 │ Gold            │
│       58875 │ Silver          │
│       60022 │ Gold            │
│       64589 │ Silver          │
│       90557 │ Gold            │
│       61158 │ Silver          │
│       85251 │ Gold            │
│       33229 │ SILVER          │
│         ·   │  ·              │
│         ·   │  ·              │
│         ·   │  ·              │
│       64228 │ Gold            │
│       79285 │ Gold            │
│       93679 │ Gold            │
│       20740 │ Gold            │
│       92400 │ Gold            │
│         594 │ GOLD            │
│       20838 │ GOLD            │
│       61462 │ Gold            │
│       63837 │ Silver          │
│       87954 │ Silver          │
├─────────────┴─────────────────┤
│ ? rows              2 columns │
│ (>9999 rows,

Query : 2
Select customers from a specified location
Identify and isolate customers based on their geographic location or city.


In [176]:
duckdb.sql("""
SELECT DISTINCT o.customer_id, p.supplier_country
FROM orders o
JOIN product_supplier p ON o.product_id = p.product_id
WHERE p.supplier_country = 'India'
""")

┌─────────────┬──────────────────┐
│ customer_id │ supplier_country │
│    int64    │     varchar      │
├─────────────┴──────────────────┤
│             0 rows             │
└────────────────────────────────┘

Query : 3
Extract essential product details
Retrieve only product name, category, and price for a simplified product overview.

In [177]:
duckdb.sql("""
SELECT product_name, product_category
FROM product_supplier
""")

┌────────────────────────────────────────┬───────────────────┐
│              product_name              │ product_category  │
│                varchar                 │      varchar      │
├────────────────────────────────────────┼───────────────────┤
│ Boy's and Girl's Ski Pants with Braces │ Children Outdoors │
│ Children's Jacket                      │ Children Outdoors │
│ Children's Jacket Sidney               │ Children Outdoors │
│ Children's Rain Set                    │ Children Outdoors │
│ Children's Rain Suit                   │ Children Outdoors │
│ Rain Suit for Children                 │ Children Outdoors │
│ Rain Suit                              │ Children Outdoors │
│ Rain Suit Tonado                       │ Children Outdoors │
│ Ski Jacket Oliver                      │ Children Outdoors │
│ Ski Jacket w/Removable Fleece          │ Children Outdoors │
│        ·                               │       ·           │
│        ·                               │       ·     

Query : 4
List products sorted by price, highest first
Show the product catalog with the most expensive items at the top.


In [178]:
duckdb.sql("""
SELECT p.product_name, MAX(o.total_retail_price) AS max_price
FROM product_supplier p
JOIN orders o ON p.product_id = o.product_id
GROUP BY p.product_name
ORDER BY max_price DESC
""")

┌───────────────────────────────────────────────┬───────────┐
│                 product_name                  │ max_price │
│                    varchar                    │  double   │
├───────────────────────────────────────────────┼───────────┤
│ Top-form 325 Treadmill                        │    6382.0 │
│ Fit4you Ski Jacket Astro                      │    3740.0 │
│ Letour Trimag Bike                            │    3708.6 │
│ Family Holiday 4                              │    3599.4 │
│ Letour Heart Bike                             │    3199.2 │
│ Top Men's R&D Ultimate Jacket                 │    3058.8 │
│ Hiclass Steel Bubble Golf set 3-Pw            │    3057.3 │
│ Badminton String Roll 200 Mt                  │    2977.8 │
│ Rollerskate  Roller Skates Hockey Wicked L.80 │    2922.0 │
│ Jeff's Hockey                                 │    2875.8 │
│     ·                                         │        ·  │
│     ·                                         │        ·  │
│     · 

Query : 5
Show orders combined with respective customer details
Join orders with their corresponding customer information.


In [179]:
duckdb.sql("""
SELECT o.order_id, o.customer_id, o.product_id, o.quantity_ordered, o.total_retail_price, o.order_date,
       p.product_name, p.product_category
FROM orders o
JOIN product_supplier p ON o.product_id = p.product_id
""")

┌───────────┬─────────────┬──────────────┬──────────────────┬────────────────────┬────────────┬───────────────────────────────────────────────┬──────────────────────────┐
│ order_id  │ customer_id │  product_id  │ quantity_ordered │ total_retail_price │ order_date │                 product_name                  │     product_category     │
│   int64   │    int64    │    int64     │      int64       │       double       │  varchar   │                    varchar                    │         varchar          │
├───────────┼─────────────┼──────────────┼──────────────────┼────────────────────┼────────────┼───────────────────────────────────────────────┼──────────────────────────┤
│ 123952566 │       35967 │ 230100100012 │                2 │              376.4 │ 16-Jun-20  │ Jacket Talkeetna                              │ Outdoors                 │
│ 123955644 │       37582 │ 240700200021 │                2 │               39.2 │ 16-Jun-20  │ Helmet XL                                     │ T

Query : 6
Calculate total quantity sold for each product
Sum all quantities ordered for every product.


In [180]:
duckdb.sql("""
SELECT p.product_name, SUM(o.quantity_ordered) AS total_quantity_sold
FROM orders o
JOIN product_supplier p ON o.product_id = p.product_id
GROUP BY p.product_name
""")

┌───────────────────────────────────────────────┬─────────────────────┐
│                 product_name                  │ total_quantity_sold │
│                    varchar                    │       int128        │
├───────────────────────────────────────────────┼─────────────────────┤
│ Maxrun Long-sleeved T-Shirt                   │                 281 │
│ Smash Classic Running Jacket w/Zipper         │                  26 │
│ South Peak Men's Shorts                       │                  74 │
│ Gamma Roller Skates                           │                 141 │
│ Winter Children's Down Jacket                 │                  60 │
│ Fleece Children's Fleece w/Hood               │                  54 │
│ Pytossage Bathing Sandal                      │                 374 │
│ Outback Sleeping Bag, Medium,Right/Blue/Black │                 481 │
│ Casual Woven Pants                            │                 110 │
│ Lyon Men's Jacket                             │               

Query : 7
Compute total revenue generated by each product
Aggregate sales revenue per product.

In [181]:
duckdb.sql("""
SELECT p.product_name, SUM(o.total_retail_price) AS total_revenue
FROM orders o
JOIN product_supplier p ON o.product_id = p.product_id
GROUP BY p.product_name
""")

┌───────────────────────────────────────────┬────────────────────┐
│               product_name                │   total_revenue    │
│                  varchar                  │       double       │
├───────────────────────────────────────────┼────────────────────┤
│ Big Guy Men's Air Quest Shoes             │ 19744.800000000003 │
│ Tony's Children's Deschutz (Bg) Shoes     │ 1580.8000000000006 │
│ Rock-solid Jacket                         │ 3929.3999999999996 │
│ Woman's Sweatshirt w/Small Zipper L       │  5614.499999999999 │
│ Eclipse Mens Off Court Tennis Shoes       │  12284.79999999999 │
│ N.d.gear Avantgarde Pants                 │              957.2 │
│ K2 Woven Pants                            │              207.6 │
│ Big Guy Men's Dry Fit Long Tights         │             4595.5 │
│ Maxrun Short Running Tights               │             8962.8 │
│ Sports Bra                                │             4588.8 │
│     ·                                     │                ·

Query : 8
Identify products that achieved revenue beyond a certain limit
Filter and list products that generated substantial revenue.

In [182]:
duckdb.sql("""
SELECT p.product_name, SUM(o.total_retail_price) AS total_revenue
FROM orders o
JOIN product_supplier p ON o.product_id = p.product_id
GROUP BY p.product_name
HAVING SUM(o.total_retail_price) > 50000
""")

┌───────────────────────────────────────────────┬────────────────────┐
│                 product_name                  │   total_revenue    │
│                    varchar                    │       double       │
├───────────────────────────────────────────────┼────────────────────┤
│ Tx Peak Parka                                 │  73639.20000000001 │
│ Comfort Shelter                               │           275440.0 │
│ Jeff's Hockey                                 │            87711.9 │
│ Petanque Balls Chromium 8-pack                │            89570.5 │
│ Mayday Stripe Pullover                        │  77672.00000000012 │
│ Twain Women's Evolution 8.0 T Ski Boots       │           116358.0 │
│ Proplay Women's Molitor 3+8 Golf set          │  81257.10000000002 │
│ Perfect Fit Men's Roller Skates               │  63576.63000000005 │
│ Family Holiday 4                              │  551186.6000000006 │
│ Cayenne Red                                   │  53041.60000000008 │
│     

Query : 9
Create a ranked list of customers based on spending
Order customers from highest to lowest total purchase amount.


In [183]:
duckdb.sql("""
SELECT customer_id, SUM(total_retail_price) AS total_spent,
       RANK() OVER (ORDER BY SUM(total_retail_price) DESC) AS rank
FROM orders
GROUP BY customer_id
""")

┌─────────────┬───────────────────┬───────┐
│ customer_id │    total_spent    │ rank  │
│    int64    │      double       │ int64 │
├─────────────┼───────────────────┼───────┤
│        7766 │ 6826.299999999999 │     1 │
│       31519 │            6585.8 │     2 │
│       77062 │ 6569.599999999999 │     3 │
│       89591 │            6432.5 │     4 │
│       54290 │            6382.0 │     5 │
│       28530 │            6257.2 │     6 │
│       53078 │ 5027.400000000001 │     7 │
│       92062 │            5016.3 │     8 │
│       62745 │ 4991.299999999999 │     9 │
│        3190 │            4893.1 │    10 │
│          ·  │               ·   │     · │
│          ·  │               ·   │     · │
│          ·  │               ·   │     · │
│       17450 │             774.5 │  9989 │
│       70406 │             774.4 │  9992 │
│       14885 │ 774.3000000000001 │  9993 │
│       27004 │             774.3 │  9994 │
│       80527 │             774.3 │  9994 │
│       86860 │             774.

Query : 10
Generate monthly revenue summaries segmented by product category
Show how revenue varies month-wise for each product category.

In [184]:
duckdb.sql("""
SELECT strftime('%Y-%m', strptime(order_date, '%d-%b-%y')) AS month,
       p.product_category,
       SUM(o.total_retail_price) AS monthly_revenue
FROM orders o
JOIN product_supplier p ON o.product_id = p.product_id
GROUP BY month, p.product_category
ORDER BY month, p.product_category;
""")

┌─────────┬──────────────────────────┬────────────────────┐
│  month  │     product_category     │  monthly_revenue   │
│ varchar │         varchar          │       double       │
├─────────┼──────────────────────────┼────────────────────┤
│ 2017-01 │ Assorted Sports Articles │  41594.67500000004 │
│ 2017-01 │ Children Sports          │             9340.7 │
│ 2017-01 │ Clothes                  │  42914.94999999998 │
│ 2017-01 │ Golf                     │ 13128.800000000001 │
│ 2017-01 │ Indoor Sports            │            15421.0 │
│ 2017-01 │ Outdoors                 │  68335.39999999995 │
│ 2017-01 │ Racket Sports            │             5557.9 │
│ 2017-01 │ Running - Jogging        │  9938.000000000007 │
│ 2017-01 │ Shoes                    │  45199.70000000003 │
│ 2017-01 │ Swim Sports              │ 1582.5000000000002 │
│    ·    │    ·                     │          ·         │
│    ·    │    ·                     │          ·         │
│    ·    │    ·                     │  

Query : 11
Find customers whose spending is above the average customer’s spend
Determine which customers spend more than the overall average.

In [185]:
duckdb.sql("""
WITH customer_total AS (
    SELECT customer_id, SUM(total_retail_price) AS total_spent
    FROM orders
    GROUP BY customer_id
),
average_spend AS (
    SELECT AVG(total_spent) AS avg_spent
    FROM customer_total
)
SELECT customer_id, total_spent
FROM customer_total
WHERE total_spent > (SELECT avg_spent FROM average_spend)
""")

┌─────────────┬────────────────────┐
│ customer_id │    total_spent     │
│    int64    │       double       │
├─────────────┼────────────────────┤
│       64570 │             504.79 │
│       65471 │             1320.5 │
│       78633 │             1345.0 │
│       90860 │ 1463.6399999999999 │
│       19546 │ 2208.7999999999997 │
│       19854 │              797.1 │
│       36168 │              488.5 │
│        6306 │              540.2 │
│       18630 │             1502.5 │
│       27357 │              750.6 │
│          ·  │                ·   │
│          ·  │                ·   │
│          ·  │                ·   │
│        7331 │ 1835.0000000000002 │
│       57781 │             461.38 │
│       59971 │ 1100.8000000000002 │
│       81088 │ 1031.3000000000002 │
│       92252 │ 505.20000000000005 │
│       50775 │              591.6 │
│       13652 │ 502.70000000000005 │
│       20592 │             1009.0 │
│       24742 │  800.5999999999999 │
│       31060 │              627.6 │
├

Query : 12
Assign customers into spending brackets (High, Medium, Low)
Categorize customers based on total spend.

In [186]:
duckdb.sql("""
WITH customer_total AS (
    SELECT customer_id, SUM(total_retail_price) AS total_spent
    FROM orders
    GROUP BY customer_id
)
SELECT customer_id, total_spent,
       CASE 
           WHEN total_spent >= 50000 THEN 'High'
           WHEN total_spent >= 20000 THEN 'Medium'
           ELSE 'Low'
       END AS spending_bracket
FROM customer_total
""")

┌─────────────┬────────────────────┬──────────────────┐
│ customer_id │    total_spent     │ spending_bracket │
│    int64    │       double       │     varchar      │
├─────────────┼────────────────────┼──────────────────┤
│       54495 │ 1991.2000000000003 │ Low              │
│       81847 │ 1628.1999999999998 │ Low              │
│       93653 │              369.4 │ Low              │
│       22750 │              332.6 │ Low              │
│       25540 │             1170.5 │ Low              │
│       90807 │             1024.5 │ Low              │
│        3907 │             133.98 │ Low              │
│       29813 │              332.8 │ Low              │
│       31662 │              132.3 │ Low              │
│       48729 │  524.5999999999999 │ Low              │
│         ·   │                ·   │  ·               │
│         ·   │                ·   │  ·               │
│         ·   │                ·   │  ·               │
│       34787 │             1003.6 │ Low        

Query : 13
Identify customers who made multiple purchases over time
Spot repeat buyers and analyze purchase frequency

In [187]:
duckdb.sql("""
SELECT customer_id, COUNT(DISTINCT order_id) AS num_orders
FROM orders
GROUP BY customer_id
HAVING COUNT(DISTINCT order_id) > 1
""")

┌─────────────┬────────────┐
│ customer_id │ num_orders │
│    int64    │   int64    │
├─────────────┼────────────┤
│       89874 │          3 │
│       19749 │          5 │
│       43691 │          3 │
│       51866 │          7 │
│       92614 │          3 │
│       84422 │          7 │
│       78268 │          4 │
│       19441 │          6 │
│       61290 │          8 │
│       74598 │          3 │
│         ·   │          · │
│         ·   │          · │
│         ·   │          · │
│       84393 │          2 │
│       40253 │          2 │
│       37438 │          2 │
│       35120 │          2 │
│       92405 │          2 │
│       32498 │          2 │
│       85757 │          2 │
│       72244 │          2 │
│       57511 │          3 │
│       20553 │          2 │
├─────────────┴────────────┤
│ ? rows         2 columns │
│ (>9999 rows, 20 shown)   │
└──────────────────────────┘

Query : 14
Analyze customer retention by grouping customers based on their first purchase month
Track how many customers make repeat purchases in later months.


In [188]:
duckdb.sql("""
WITH first_purchase AS (
    SELECT customer_id, MIN(strftime('%Y-%m', strptime(order_date, '%d-%b-%y'))) AS first_month
    FROM orders
    GROUP BY customer_id
)
SELECT f.first_month, COUNT(DISTINCT o.customer_id) AS repeat_customers
FROM orders o
JOIN first_purchase f ON o.customer_id = f.customer_id
WHERE strftime('%Y-%m', strptime(o.order_date, '%d-%b-%y')) > f.first_month
GROUP BY f.first_month
""")

┌─────────────┬──────────────────┐
│ first_month │ repeat_customers │
│   varchar   │      int64       │
├─────────────┼──────────────────┤
│ 2019-03     │              851 │
│ 2017-03     │             1628 │
│ 2019-10     │              326 │
│ 2018-03     │             1147 │
│ 2018-04     │             1273 │
│ 2019-05     │              762 │
│ 2017-10     │             1022 │
│ 2019-12     │              287 │
│ 2017-12     │              938 │
│ 2017-02     │             1581 │
│    ·        │               ·  │
│    ·        │               ·  │
│    ·        │               ·  │
│ 2020-10     │              143 │
│ 2021-05     │              220 │
│ 2021-10     │               45 │
│ 2020-03     │              435 │
│ 2020-06     │              281 │
│ 2021-03     │              245 │
│ 2020-09     │              127 │
│ 2021-01     │              350 │
│ 2021-07     │              136 │
│ 2021-06     │              155 │
├─────────────┴──────────────────┤
│ 59 rows (20 shown)

Query : 15
Detect product pairs that are frequently bought together
Identify bundles or commonly purchased combinations.

In [189]:
duckdb.sql("""
SELECT o1.product_id AS product_1, o2.product_id AS product_2, COUNT(*) AS times_bought_together
FROM orders o1
JOIN orders o2 ON o1.order_id = o2.order_id AND o1.product_id < o2.product_id
GROUP BY o1.product_id, o2.product_id
ORDER BY times_bought_together DESC
LIMIT 20
""")

┌───────────┬───────────┬───────────────────────┐
│ product_1 │ product_2 │ times_bought_together │
│   int64   │   int64   │         int64         │
├───────────┴───────────┴───────────────────────┤
│                    0 rows                     │
└───────────────────────────────────────────────┘

Query : 16
Calculate and interpret moving averages of daily sales data 
Smooth sales trends using a rolling window.

In [190]:
duckdb.sql("""
SELECT strptime(order_date, '%d-%b-%y') AS order_date_parsed,
       SUM(total_retail_price) AS daily_sales,
       AVG(SUM(total_retail_price)) OVER (ORDER BY strptime(order_date, '%d-%b-%y') ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS moving_avg_7days
FROM orders
GROUP BY order_date_parsed
ORDER BY order_date_parsed
""")

┌─────────────────────┬────────────────────┬────────────────────┐
│  order_date_parsed  │    daily_sales     │  moving_avg_7days  │
│      timestamp      │       double       │       double       │
├─────────────────────┼────────────────────┼────────────────────┤
│ 2017-01-01 00:00:00 │              225.5 │              225.5 │
│ 2017-01-02 00:00:00 │ 10476.300000000001 │  5350.900000000001 │
│ 2017-01-03 00:00:00 │             8683.2 │  6461.666666666667 │
│ 2017-01-04 00:00:00 │            3434.19 │          5704.7975 │
│ 2017-01-05 00:00:00 │  9129.800000000001 │           6389.798 │
│ 2017-01-06 00:00:00 │          10036.775 │          6997.6275 │
│ 2017-01-07 00:00:00 │  8573.279999999999 │  7222.720714285714 │
│ 2017-01-08 00:00:00 │ 12263.530000000002 │  8942.439285714285 │
│ 2017-01-09 00:00:00 │           9347.725 │  8781.214285714286 │
│ 2017-01-10 00:00:00 │ 13129.599999999997 │  9416.414285714285 │
│          ·          │          ·         │          ·         │
│         

Query : 17
Produce monthly product sales rankings within each category, including ties
Rank products by sales for each category and month.


In [191]:
duckdb.sql("""
SELECT strftime('%Y-%m', strptime(order_date, '%d-%b-%y')) AS month,
       p.product_category,
       p.product_name,
       SUM(o.quantity_ordered) AS total_quantity,
       RANK() OVER (PARTITION BY strftime('%Y-%m', strptime(order_date, '%d-%b-%y')), p.product_category
                    ORDER BY SUM(o.quantity_ordered) DESC) AS rank
FROM orders o
JOIN product_supplier p ON o.product_id = p.product_id
GROUP BY month, p.product_category, p.product_name
ORDER BY month, p.product_category, rank
""")

┌─────────┬──────────────────────────┬─────────────────────────────────────┬────────────────┬───────┐
│  month  │     product_category     │            product_name             │ total_quantity │ rank  │
│ varchar │         varchar          │               varchar               │     int128     │ int64 │
├─────────┼──────────────────────────┼─────────────────────────────────────┼────────────────┼───────┤
│ 2017-01 │ Assorted Sports Articles │ Bulls Eye Stuart/Tungsten 24 Gram   │             17 │     1 │
│ 2017-01 │ Assorted Sports Articles │ Aim4it 16 Gram Softtip Pil          │             14 │     2 │
│ 2017-01 │ Assorted Sports Articles │ Aim4it 18 Gram Softtip Pil          │             10 │     3 │
│ 2017-01 │ Assorted Sports Articles │ Brt Anorak                          │              7 │     4 │
│ 2017-01 │ Assorted Sports Articles │ Teen Multi, Assorted Colours        │              7 │     4 │
│ 2017-01 │ Assorted Sports Articles │ Aim4it 80% Tungsten 23 Gram         │      