In [16]:
import pandas as pd
import sqlite3

df = pd.read_pickle("../data/Amazon_products.pkl")

In [17]:
conn = sqlite3.connect(":memory:")
df.to_sql("products", conn, index=False)
cursor = conn.cursor()

# Get the number of products in the dataset
cursor.execute("SELECT COUNT(*) FROM products")
print(cursor.fetchone())

(1242,)


In [18]:
def transform_to_dataframe(result):
    return pd.DataFrame(result, columns=[desc[0] for desc in cursor.description])

#### 1. Getting products that have price less than 100

In [19]:
query = "SELECT * FROM products WHERE price < 100"
# result = pd.read_sql_query(query, conn)
# result.head()
cursor.execute(query)
result = cursor.fetchall()
transform_to_dataframe(result).head()

Unnamed: 0,asin,title,price,list_price,rating,reviews,sold_past_month,is_bestseller,is_prime,is_amazon_choice,has_sustainability_features,available_offers,amazon_choice_type,brand,free_delivery_date,fastest_delivery_date
0,B0CT4HX4T9,"Lined Spiral Journal Notebook for Women & Men,...",5.99,7.99,4.7,1158.0,10000.0,0,0,1,1,,Overall Pick,,,
1,B003O2RXUQ,"Five Star Spiral Notebook + Study App, 5 Subje...",5.68,11.99,4.8,30479.0,7000.0,1,0,0,0,,,,,
2,B00D3OR58A,"Oxford Spiral Notebook 6 Pack, 1 Subject, Coll...",11.99,12.81,4.8,42678.0,6000.0,0,0,0,0,7.0,,,,
3,B00P9U2EM8,"Mead Spiral Notebooks, 6 Pack, 1 Subject, Coll...",12.21,,4.7,16143.0,10000.0,0,0,0,0,18.0,,,,
4,B01DN8TCEU,"Amazon Basics Classic Lined Notebook, 240 Page...",6.39,8.54,4.7,46185.0,5000.0,0,0,0,0,,,,,


#### 2. All products that are eligible for Amazon Prime and have an average rating of 4 or higher.

In [20]:
query = """
SELECT * FROM products
WHERE is_prime = 1 AND rating > 4
"""
cursor.execute(query)
result = cursor.fetchall()
transform_to_dataframe(result).head()

Unnamed: 0,asin,title,price,list_price,rating,reviews,sold_past_month,is_bestseller,is_prime,is_amazon_choice,has_sustainability_features,available_offers,amazon_choice_type,brand,free_delivery_date,fastest_delivery_date
0,B07QGHK6Q8,"Logitech G213 Prodigy Gaming Keyboard, LIGHTSY...",39.99,69.99,4.5,6142.0,2000.0,0,1,1,1,2.0,Overall Pick,,"Tue, Nov 19",
1,B06XW8QXVG,Perixx PERIBOARD-317 Wired Backlit USB Keyboar...,19.99,29.9,4.4,8242.0,400.0,0,1,0,0,3.0,,,"Tue, Nov 19",
2,B08Z6X4NK3,Logitech G413 SE Full-Size Mechanical Gaming K...,57.06,79.99,4.6,1668.0,1000.0,0,1,0,1,8.0,,,"Tue, Nov 19",
3,B09STTTKBK,Arteck Backlit USB Wired Full Size Keyboard wi...,35.99,,4.5,2473.0,500.0,0,1,0,0,2.0,,,"Tue, Nov 19",
4,B0CZM6JKZ4,HP 400 Backlit Wired Keyboard (Ash Wired) - Wa...,39.99,49.99,4.4,34.0,50.0,0,1,0,0,,,,"Tue, Nov 19",


#### 3. top 5 best-selling products (is_bestseller) sorted by the number of units sold in the past month.

In [21]:
query = """
SELECT * FROM products
WHERE is_bestseller = 1
ORDER BY sold_past_month DESC
LIMIT 5
"""
cursor.execute(query)
result = cursor.fetchall()
transform_to_dataframe(result).head()

Unnamed: 0,asin,title,price,list_price,rating,reviews,sold_past_month,is_bestseller,is_prime,is_amazon_choice,has_sustainability_features,available_offers,amazon_choice_type,brand,free_delivery_date,fastest_delivery_date
0,B08BRCT4JH,"BESIGN LS03 Aluminum Laptop Stand, Ergonomic D...",18.99,24.99,4.8,18013.0,10000.0,1,0,0,1,2.0,,,,
1,B09MQWWP87,"Taygeer Travel Backpack for Women, Carry On Ba...",17.99,23.99,4.7,10033.0,10000.0,1,0,0,1,,,,,
2,B0B1HJ666G,Mac Book Pro Charger - 118W USB C Charger Fast...,35.99,,4.4,4321.0,10000.0,1,0,0,0,5.0,,,,
3,B09G9FPHY6,"iPad (9th Generation): with A13 Bionic chip, 1...",,,4.8,70660.0,10000.0,1,0,0,1,4.0,,Apple,,
4,B079JLY5M5,MK270 Wireless Keyboard And Mouse Combo For Wi...,29.99,,4.5,97246.0,10000.0,1,1,0,1,2.0,,Logitech,"Tue, Nov 19","Mon, Nov 18"


#### 4. All products with a discount greater than 20%

In [22]:
query = """
SELECT title, price, rating, sold_past_month,
ROUND((list_price - price) / list_price * 100, 2) AS discount
FROM products
WHERE discount > 20
ORDER BY discount DESC
"""
cursor.execute(query)
result = cursor.fetchall()
transform_to_dataframe(result).head()

Unnamed: 0,title,price,rating,sold_past_month,discount
0,"Mead Composition Notebooks, 12 Pack, Wide Rule...",20.95,4.8,900.0,74.85
1,"AOC Newest 15.6"" Laptop, Intel Quad-Core Proce...",363.98,4.3,400.0,74.0
2,"ACEMAGIC 2024 Newest 17.3Inch Laptop,FHD Displ...",399.99,4.3,200.0,73.33
3,"ACEMAGIC Laptop, Newest Windows 11 Laptop Comp...",359.99,4.8,100.0,72.31
4,"ACEMAGIC Laptop, Newest Windows 11 Laptop Comp...",359.99,4.8,100.0,72.31


#### Adding a new column `discount` to the dataset

In [None]:
add_column_query = """
ALTER TABLE products
ADD COLUMN discount FLOAT
"""

insert_query = """
UPDATE products
SET discount = ROUND((list_price - price) / list_price * 100, 2)
"""

try:
    # context manager
    with conn:
        cursor.execute(add_column_query)
        cursor.execute(insert_query)

except Exception as e:
    print(f'Error: {e}')




#### 5. Getting top 10 brands with the highest average rating for prime products

In [24]:
query = """
WITH product_prime_counts AS (
SELECT brand, count(is_prime) AS prime_count, AVG(rating) AS avg_rating
FROM products
WHERE is_prime = 1 and brand IS NOT NULL
GROUP BY brand
ORDER BY avg_rating DESC
)
SELECT brand, avg_rating,
DENSE_RANK() OVER (ORDER BY prime_count DESC) AS prime_rank
FROM product_prime_counts
WHERE prime_count > 10
ORDER BY avg_rating DESC
LIMIT 10
"""
cursor.execute(query)
result = cursor.fetchall()
transform_to_dataframe(result).head(10)




Unnamed: 0,brand,avg_rating,prime_rank
0,ASUS,4.515385,6
1,BenQ,4.508333,7
2,LG,4.5,5
3,Logitech,4.483333,1
4,acer,4.466667,4
5,Razer,4.428571,2
6,Corsair,4.396,3
7,SAMSUNG,4.353846,6
8,SteelSeries,4.153333,4


In [25]:
query = """
SELECT brand, 
COUNT(is_prime) over (PARTITION BY brand) AS prime_count
FROM products
WHERE is_prime = 1 and brand IS NOT NULL
ORDER BY prime_count DESC
"""
cursor.execute(query)
result = cursor.fetchall()
transform_to_dataframe(result).head()

Unnamed: 0,brand,prime_count
0,Logitech,30
1,Logitech,30
2,Logitech,30
3,Logitech,30
4,Logitech,30


In [None]:
query = """
SELECT *, 1 AS is_prime
case when rating > 4 then 1 else 0 end AS is_high_rated
FROM products
WHERE is_prime = 1 and is_high_rated = 1

"""

cursor.execute(query)
result = cursor.fetchall()
transform_to_dataframe(result).head()

Unnamed: 0,asin,title,price,list_price,rating,reviews,sold_past_month,is_bestseller,is_prime,is_amazon_choice,has_sustainability_features,available_offers,amazon_choice_type,brand,free_delivery_date,fastest_delivery_date,discount,is_prime.1
0,B07QGHK6Q8,"Logitech G213 Prodigy Gaming Keyboard, LIGHTSY...",39.99,69.99,4.5,6142.0,2000.0,0,1,1,1,2.0,Overall Pick,,"Tue, Nov 19",,42.86,1
1,B06XW8QXVG,Perixx PERIBOARD-317 Wired Backlit USB Keyboar...,19.99,29.9,4.4,8242.0,400.0,0,1,0,0,3.0,,,"Tue, Nov 19",,33.14,1
2,B08Z6X4NK3,Logitech G413 SE Full-Size Mechanical Gaming K...,57.06,79.99,4.6,1668.0,1000.0,0,1,0,1,8.0,,,"Tue, Nov 19",,28.67,1
3,B09STTTKBK,Arteck Backlit USB Wired Full Size Keyboard wi...,35.99,,4.5,2473.0,500.0,0,1,0,0,2.0,,,"Tue, Nov 19",,,1
4,B0CZM6JKZ4,HP 400 Backlit Wired Keyboard (Ash Wired) - Wa...,39.99,49.99,4.4,34.0,50.0,0,1,0,0,,,,"Tue, Nov 19",,20.0,1


In [None]:
conn.commit()
conn.close()
