In [43]:

from dim_date import df_date as dim_date
import pandasql as ps
import pandas as pd
dim_customer = pd.read_csv('/Users/akshayadhanraj/Python/DataAnalystTask/dim_customer.csv')
dim_shipping = pd.read_csv('/Users/akshayadhanraj/Python/DataAnalystTask/dim_shipping.csv')
dim_product = pd.read_csv('/Users/akshayadhanraj/Python/DataAnalystTask/dim_product.csv')
fact_order = pd.read_csv('/Users/akshayadhanraj/Python/DataAnalystTask/fact_order.csv')


##### 1.	The total amount spent and the country for the Pending delivery status for each country.

In [45]:
q1 = """ SELECT c.country, SUM(o.amount) AS total_amount_spent FROM fact_order o 
LEFT JOIN dim_customer c ON o.customer_id = c.customer_id 
LEFT JOIN dim_shipping s ON o.shipping_id = s.shipping_id 
WHERE status = 'Pending' GROUP BY country """
print(ps.sqldf(q1, locals()))

  country  total_amount_spent
0     UAE               53800
1      UK              136300
2     USA               65500


##### 2.	The total number of transactions, total quantity sold, and total amount spent for each customer, along with the product details.

In [46]:
q2 = """ WITH customer_item_agg AS (
    SELECT 
        o.customer_id,
        p.product_name,
        COUNT(*) AS total_transactions,
        COUNT(*) AS total_quantity_sold,
        SUM(o.amount) AS total_amount_spent
    FROM fact_order o
    LEFT JOIN dim_product p ON o.product_id = p.product_id
    GROUP BY customer_id, product_name
)
SELECT 
    c.customer_id,
    c.first_name,
    c.last_name,
    cia.product_name,
    cia.total_transactions,
    cia.total_quantity_sold,
    cia.total_amount_spent
FROM customer_item_agg cia
JOIN dim_customer c ON c.customer_id = cia.customer_id
ORDER BY c.customer_id, cia.product_name"""

print(ps.sqldf(q2, locals()))

     customer_id first_name   last_name product_name  total_transactions  \
0              4       Eric      Carter     Mousepad                   1   
1              5    William     Jackson      DDR RAM                   1   
2              8      Jason  Montgomery      DDR RAM                   2   
3              8      Jason  Montgomery     Mousepad                   4   
4              8      Jason  Montgomery       Webcam                   2   
..           ...        ...         ...          ...                 ...   
232          247       John      Miller     Mousepad                   1   
233          249   Patricia      Garcia      DDR RAM                   1   
234          249   Patricia      Garcia      Headset                   1   
235          249   Patricia      Garcia     Mousepad                   2   
236          250    Stephen       Jones        Mouse                   1   

     total_quantity_sold  total_amount_spent  
0                      1                

##### 3.	The maximum product purchased for each country.

In [47]:
q3 = """SELECT country, product_name
FROM (
  SELECT c.country, p.product_name, COUNT(o.customer_id) AS total_quantity,
  DENSE_RANK() OVER (PARTITION BY c.country ORDER BY COUNT(o.customer_id) DESC) AS rn
  FROM fact_order o
  LEFT JOIN dim_customer c ON o.customer_id = c.customer_id
  LEFT JOIN dim_product p ON o.product_id = p.product_id
  GROUP BY c.country, p.product_name
) a WHERE rn = 1 """

print(ps.sqldf(q3,locals()))

  country product_name
0     UAE     Keyboard
1      UK     Mousepad
2      UK     Keyboard
3     USA     Mousepad


##### 4.	The most purchased product based on the age category less than 30 and above 30.

In [53]:
q4 = """WITH customer_age AS (
    SELECT 
    c.customer_id,
    c.age_category,
    p.product_name
    FROM fact_order o
    JOIN dim_customer c ON o.customer_id = c.customer_id
    LEFT JOIN dim_product p ON o.product_id = p.product_id
),
product_quantity AS (
    SELECT 
    age_category,
    product_name,
    COUNT(*) AS total_quantity_sold
    FROM customer_age
    GROUP BY age_category, product_name
),
ranked_products AS (
    SELECT 
    *,
    ROW_NUMBER() OVER (PARTITION BY age_category ORDER BY total_quantity_sold DESC) AS rn
    FROM product_quantity
)
SELECT age_category, product_name, total_quantity_sold
FROM ranked_products
WHERE rn = 1"""

print(ps.sqldf(q4,locals()))


  age_category product_name  total_quantity_sold
0          <30     Mousepad                   23
1         >=30     Keyboard                   49


##### 5.	The country that had minimum transactions and sales amount

In [62]:
q5 = """WITH country_totals AS (
    SELECT 
        c.country,
        COUNT(*) AS total_transactions,
        SUM(o.amount) AS total_sales
    FROM fact_order o
    JOIN dim_customer c ON o.customer_id = c.customer_id
    GROUP BY c.country
)
SELECT *
FROM country_totals
ORDER BY total_transactions ASC, total_sales ASC
LIMIT 1"""

print(ps.sqldf(q5,locals()))

  country  total_transactions  total_sales
0     UAE                  63        81650
