In [2]:
import pandas as pd
import pandasql as ps

df = pd.read_pickle("../data/Amazon_products.pkl")


In [7]:
query = """
SELECT *
FROM df
LIMIT 5
"""

result = ps.sqldf(query, locals())
result

Unnamed: 0,asin,title,price,list_price,rating,reviews,sold_past_month,is_bestseller,is_prime,is_amazon_choice,has_sustainability_features,available_offers,amazon_choice_type,brand,free_delivery_date,fastest_delivery_date
0,B0DJK7NW1J,"15.6 Inch Laptops, Windows 11 Laptop Computers...",199.98,679.99,4.4,48.0,100.0,0,0,0,0,,,,,
1,B0BS4BP8FB,Acer Aspire 3 A315-24P-R7VH Slim Laptop | 15.6...,279.99,321.99,4.4,39243.0,8000.0,0,0,1,0,6.0,Overall Pick,,,
2,B0DKDY78K3,"Newest Gaming Laptop, Laptop with AMD Ryzen 7 ...",649.99,1699.99,4.8,18.0,100.0,0,0,0,0,,,,,
3,B0CPL25J3W,"HP Portable Laptop, Student and Business, 14"" ...",197.35,269.0,4.1,1678.0,1000.0,0,0,0,0,25.0,,,,
4,B0947BJ67M,"HP 14 Laptop, Intel Celeron N4020, 4 GB RAM, 6...",176.0,209.99,4.0,1861.0,4000.0,0,0,0,0,50.0,,,,


#### 1. Getting the average price of products that have been sold more than the average number of times in the past month

In [None]:
query = """
SELECT AVG(price) as avg_price
FROM df
WHERE sold_past_month > (
    SELECT AVG(sold_past_month)
    FROM df
)
"""

result = ps.sqldf(query, locals())
result

Unnamed: 0,avg_price
0,57.50738


#### 2. Count the number of products with the highest rating and reviews more than 10

In [12]:
query = """
SELECT COUNT(*) as count
FROM df
WHERE rating = (
    SELECT MAX(rating)
    FROM df
) and reviews > 10
"""

result = ps.sqldf(query, locals())
result

Unnamed: 0,count
0,1


#### 3. Get moving average of discount for each brand in a 7-day window

In [None]:
query = """
WITH temp AS (
    SELECT title, price, list_price, rating, sold_past_month, brand,
    ROUND((list_price - price) / list_price * 100, 2) as discount
    FROM df
    WHERE list_price IS NOT NULL AND brand IS NOT NULL
)
SELECT title, price, list_price, rating, sold_past_month, brand, discount,
AVG(discount) OVER (PARTITION BY brand ORDER BY title ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) as moving_avg_discount
FROM temp

"""

result = ps.sqldf(query, locals())
result.head(10)

Unnamed: 0,title,price,list_price,rating,sold_past_month,brand,discount,moving_avg_discount
0,24” (23.8-inch viewable) 1080P Eye Care Monito...,74.0,99.0,4.6,1000.0,ASUS,25.25,25.25
1,24” (23.8-inch viewable) Eye Care Monitor (VY2...,76.99,99.0,4.4,400.0,ASUS,22.23,23.74
2,"27” 1080P Monitor (VA27DQ) - Full HD, IPS, 75H...",136.95,159.0,4.6,300.0,ASUS,13.87,20.45
3,"Chromebook CM14 Laptop, 14"" HD Anti-Glare Disp...",169.9,279.99,4.2,500.0,ASUS,39.32,25.1675
4,"Chromebook CM14 Laptop, 14"" HD Anti-Glare Disp...",169.9,279.99,4.2,500.0,ASUS,39.32,27.998
5,ROG Strix 27” 1440P Gaming Monitor (XG27AQMR) ...,399.0,599.0,4.3,100.0,ASUS,33.39,28.896667
6,ROG Strix 27” QHD (2560x1440) HDR400 USB-C Gam...,257.25,299.0,4.2,200.0,ASUS,13.96,26.762857
7,"ROG Strix G16 (2024) Gaming Laptop, 16” 16:10 ...",1283.26,1399.99,4.3,1000.0,ASUS,8.34,24.347143
8,ROG Swift 32” 4K OLED Gaming Monitor (PG32UCDM...,1229.99,1299.0,4.4,500.0,ASUS,5.31,21.93
9,"TUF Gaming 23.6"" 1080P Curved Monitor (VG24VQE...",109.99,169.0,4.6,400.0,ASUS,34.92,24.937143


#### 4. Top 10 brands with highest price and rating


In [None]:
query = """
WITH temp AS (
SELECT brand, MAX(price) as max_price, MAX(rating) as max_rating
FROM df
WHERE brand IS NOT NULL
GROUP BY brand
ORDER BY max_price DESC, max_rating DESC
)
SELECT brand, max_price, max_rating,
DENSE_RANK() OVER (ORDER BY max_price DESC, max_rating DESC) as rank
FROM temp
LIMIT 10
"""

result = ps.sqldf(query, locals())
result


Unnamed: 0,brand,max_price,max_rating,rank
0,ASUS,1636.0,4.8,1
1,SAMSUNG,1225.0,4.6,2
2,Dell,1199.99,5.0,3
3,acer,954.99,4.6,4
4,HP,873.99,5.0,5
5,ApoloSign,849.99,4.3,6
6,BenQ,799.99,4.7,7
7,LG,794.99,4.6,8
8,Lenovo,649.97,5.0,9
9,BIGASUO,399.98,4.2,10
