In [83]:
import pandas as pd
import pandasql as ps

In [84]:
category = pd.read_csv('data/category_data.csv')
orders = pd.read_csv('data/order_data.csv')
order_items = pd.read_csv('data/order_items_data.csv')
products = pd.read_csv('data/product_data.csv')
reviews = pd.read_csv('data/review_data.csv')
users = pd.read_csv('data/user_data.csv')

In [85]:
t1_p1 = '''
SELECT 
    product_id, product_name, description, price, category_id
FROM 
    products
WHERE 
    category_id = (
        SELECT category_id FROM category WHERE category_name LIKE 'Sports%'
    )
'''
ps.sqldf(t1_p1, locals())

Unnamed: 0,product_id,product_name,description,price,category_id
0,15,Mountain Bike,Conquer the trails with this high-performance ...,1000.0,8
1,16,Tennis Racket,Take your tennis game to the next level with t...,54.0,8


In [86]:
# orders.groupby('user_id').agg({'order_id': 'count'}).reset_index().rename(columns={'order_id': 'order_count'})
t1_p2 = '''
SELECT u.user_id, u.username, o.total_orders
FROM
(
    (
        SELECT user_id, username
        FROM users
    ) u
    INNER JOIN
    (
        SELECT user_id, COUNT(order_id) AS total_orders
        FROM orders
        GROUP BY user_id
    ) o
    ON u.user_id = o.user_id
)
'''
ps.sqldf(t1_p2, locals())

Unnamed: 0,user_id,username,total_orders
0,1,johndoe,1
1,2,janesmith,1
2,3,maryjones,1
3,4,robertbrown,1
4,5,sarahwilson,1
5,6,michaellee,1
6,7,lisawilliams,1
7,8,chrisharris,1
8,9,emilythompson,1
9,10,davidmartinez,1


In [87]:
# rr = reviews.groupby('product_id').agg({'rating': 'mean'}).reset_index().rename(columns={'rating': 'avg_rating'})
# products.merge(rr, how='right', on='product_id', suffixes=('', '_r'))

t1_p3 = '''
SELECT r.product_id, p.product_name, r.avg_rating
FROM
(
    (
        SELECT product_id, product_name
        FROM products
    ) p
    RIGHT JOIN
    (
        SELECT product_id, AVG(rating) AS avg_rating
        FROM reviews
        GROUP BY product_id
    ) r
    ON p.product_id = r.product_id
)
ORDER BY avg_rating DESC
'''
ps.sqldf(t1_p3, locals())

Unnamed: 0,product_id,product_name,avg_rating
0,1,Smartphone X,5.0
1,4,Smart TV,5.0
2,7,Coffee Maker,5.0
3,11,Yoga Mat,5.0
4,15,Mountain Bike,5.0
5,18,,5.0
6,21,,5.0
7,25,,5.0
8,29,,5.0
9,2,Wireless Headphones,4.0


In [88]:
# orders.groupby('user_id').agg({'total_amount': 'sum'}).reset_index().rename(columns={'total_amount': 'total_spent_amount'}).sort_values('total_spent_amount', ascending=False)
t1_p4 = '''
SELECT o.user_id, u.username, o.total_amount_spent
FROM
(
    (
        SELECT user_id, username
        FROM users
    ) u
    RIGHT JOIN
    (
        SELECT user_id, SUM(total_amount) AS total_amount_spent
        FROM orders
        GROUP BY user_id
    ) o
    ON u.user_id = o.user_id
)
ORDER BY o.total_amount_spent DESC
LIMIT 5
'''
ps.sqldf(t1_p4, locals())

Unnamed: 0,user_id,username,total_amount_spent
0,12,jasonrodriguez,160.0
1,4,robertbrown,155.0
2,8,chrisharris,150.0
3,24,jamesrogers,150.0
4,17,olivialopez,145.0


In [90]:
t2_p5 = '''
SELECT 
  r.product_id, p.product_name, AVG(r.rating) AS average_rating
FROM 
  products p
RIGHT JOIN 
  reviews r 
ON 
  p.product_id = r.product_id
GROUP BY 
  r.product_id
HAVING AVG(r.rating) = (
  SELECT 
    MAX(avg_rating)
  FROM (
    SELECT 
      AVG(rating) AS avg_rating
    FROM 
      reviews
    GROUP BY 
      product_id
  )
)
'''
ps.sqldf(t2_p5, locals())

Unnamed: 0,product_id,product_name,average_rating
0,1,Smartphone X,5.0
1,4,Smart TV,5.0
2,7,Coffee Maker,5.0
3,11,Yoga Mat,5.0
4,15,Mountain Bike,5.0
5,18,,5.0
6,21,,5.0
7,25,,5.0
8,29,,5.0


In [91]:
users.merge(orders, how='inner', on='user_id').merge(order_items, how='inner', on='order_id').merge(products, how='left', on='product_id').merge(category, how='left', on='category_id')

Unnamed: 0,user_id,username,email,password,address,phone_number,order_id,order_date,total_amount,order_item_id,product_id,quantity,unit_price,product_name,description,price,category_id,category_name
0,1,johndoe,johndoe@example.com,pass123,123 Main St,123-456-7890,1,2021-01-05,100.0,1,1,2,50.0,Smartphone X,The Smartphone X is a powerful and feature-ric...,500.0,1.0,Electronics
1,1,johndoe,johndoe@example.com,pass123,123 Main St,123-456-7890,1,2021-01-05,100.0,2,2,1,25.0,Wireless Headphones,Experience the freedom of wireless audio with ...,150.0,1.0,Electronics
2,2,janesmith,janesmith@example.com,pass456,456 Elm St,987-654-3210,2,2021-02-10,75.0,3,3,3,30.0,Laptop Pro,The Laptop Pro is a sleek and powerful device ...,1200.0,2.0,Books
3,2,janesmith,janesmith@example.com,pass456,456 Elm St,987-654-3210,2,2021-02-10,75.0,4,4,1,15.0,Smart TV,Transform your living room into an entertainme...,800.0,2.0,Books
4,3,maryjones,maryjones@example.com,pass789,789 Oak St,555-123-4567,3,2021-03-15,120.0,5,5,2,20.0,Running Shoes,Get ready to hit the road with these lightweig...,100.0,3.0,Clothing
5,3,maryjones,maryjones@example.com,pass789,789 Oak St,555-123-4567,3,2021-03-15,120.0,6,6,1,10.0,Designer Dress,Make a statement with this elegant designer dress,300.0,3.0,Clothing
6,4,robertbrown,robertbrown@example.com,passabc,321 Pine St,111-222-3333,4,2021-04-20,155.0,7,7,3,35.0,Coffee Maker,Start your day with a perfect cup of coffee br...,80.0,4.0,Home & Kitchen
7,4,robertbrown,robertbrown@example.com,passabc,321 Pine St,111-222-3333,4,2021-04-20,155.0,8,8,1,40.0,Toaster Oven,Upgrade your kitchen with this versatile toast...,70.0,4.0,Home & Kitchen
8,5,sarahwilson,sarahwilson@example.com,passxyz,567 Maple St,444-555-6666,5,2021-05-25,90.0,9,9,2,30.0,Action Camera,Capture your adventures in stunning detail wit...,200.0,5.0,Toys & Games
9,5,sarahwilson,sarahwilson@example.com,passxyz,567 Maple St,444-555-6666,5,2021-05-25,90.0,10,10,1,25.0,Board Game Collection,Enjoy hours of fun with this diverse collectio...,50.0,5.0,Toys & Games


In [92]:
t2_p6 = '''
SELECT 
  uid as user_id, uname as username
FROM
(
  SELECT
      u.user_id as uid, u.username as uname, COUNT(DISTINCT c.category_id) as user_categories
  FROM
    users u
  INNER JOIN orders o ON u.user_id = o.user_id
  LEFT JOIN order_items oi ON o.order_id = oi.order_id
  LEFT JOIN products p ON oi.product_id = p.product_id
  LEFT JOIN category c ON p.category_id = c.category_id
  GROUP BY
    uid, uname
)
WHERE user_categories = (
  SELECT COUNT(category_id) FROM category  
)
'''
ps.sqldf(t2_p6, locals())

Unnamed: 0,user_id,username


In [None]:
# products.merge(reviews, how='right', on='product_id').groupby('product_id').agg({'rating': 'mean'}).reset_index().rename(columns={'rating': 'avg_rating'}).sort_values('product_id', ascending=True)

In [93]:
t2_p7 = '''
SELECT product_id, product_name
FROM
products
WHERE
product_id NOT IN (
    SELECT product_id
    FROM
    reviews
)
'''
ps.sqldf(t2_p7, locals())

Unnamed: 0,product_id,product_name


In [94]:
t2_p8 = '''
SELECT DISTINCT u.user_id, u.username
FROM users u
INNER JOIN
(
    SELECT
        user_id,
        order_date,
        LAG(order_date) OVER (PARTITION BY user_id ORDER BY order_date) AS prev_order_date
    FROM
        orders
) o ON u.user_id = o.user_id
WHERE JULIANDAY(o.order_date) - JULIANDAY(o.prev_order_date) = 1
'''
ps.sqldf(t2_p8, locals())

Unnamed: 0,user_id,username


In [95]:
t3_p9 = '''
SELECT
    c.category_id, c.category_name, SUM(oi.quantity * oi.unit_price) as total_sales_amount
FROM
order_items oi
LEFT JOIN products p ON oi.product_id = p.product_id
LEFT JOIN category c ON p.category_id = c.category_id
GROUP BY
c.category_id, c.category_name
ORDER BY
total_sales_amount DESC
LIMIT 3
'''
ps.sqldf(t3_p9, locals())

Unnamed: 0,category_id,category_name,total_sales_amount
0,,,570.0
1,8.0,Sports & Outdoors,155.0
2,4.0,Home & Kitchen,145.0


In [96]:
t3_p10 = '''
SELECT u.user_id, u.username
FROM
users u
INNER JOIN orders o ON u.user_id = o.user_id
LEFT JOIN order_items oi ON oi.order_id = o.order_id
LEFT JOIN products p ON oi.product_id = p.product_id
LEFT JOIN category c ON p.category_id = c.category_id
WHERE p.category_id = (
    SELECT category_id FROM category WHERE category_name = 'Toys & Games'
)
GROUP BY u.user_id, u.username
HAVING COUNT(DISTINCT oi.product_id) = (
    SELECT COUNT(product_id) FROM products WHERE category_id = (
        SELECT category_id FROM category WHERE category_name = 'Toys & Games'
    )
)
'''
ps.sqldf(t3_p10, locals())

Unnamed: 0,user_id,username
0,5,sarahwilson


In [97]:
t3_p11 = '''
SELECT 
    product_id, product_name, category_id, price 
FROM (
    SELECT 
        category_id, price, product_id, product_name,
        RANK() OVER (PARTITION BY category_id ORDER BY price DESC) AS rank
    FROM
        products
)
WHERE rank = 1
'''
ps.sqldf(t3_p11, locals())

Unnamed: 0,product_id,product_name,category_id,price
0,1,Smartphone X,1,500.0
1,3,Laptop Pro,2,1200.0
2,6,Designer Dress,3,300.0
3,7,Coffee Maker,4,80.0
4,9,Action Camera,5,200.0
5,12,Skincare Set,6,150.0
6,14,Weighted Blanket,7,100.0
7,15,Mountain Bike,8,1000.0


In [98]:

t3_p12 = '''
SELECT DISTINCT u.user_id, u.username
FROM users u
INNER JOIN
(
    SELECT
        user_id,
        order_date,
        LAG(order_date, 1) OVER (PARTITION BY user_id ORDER BY order_date) AS one_order_back,
        LAG(order_date, 2) OVER (PARTITION BY user_id ORDER BY order_date) AS two_order_back
    FROM
        orders
) o ON u.user_id = o.user_id
WHERE JULIANDAY(o.order_date) - JULIANDAY(o.one_order_back) = 1
AND JULIANDAY(o.one_order_back) - JULIANDAY(o.two_order_back) = 1
'''
ps.sqldf(t3_p12, locals())

Unnamed: 0,user_id,username
