In [1]:
# !pip install ipython-sql pandas

In [2]:
%load_ext sql

In [3]:
import sqlite3
import pandas as pd

# Create an in-memory SQLite database
conn = sqlite3.connect("Fetch.db")

# Load datasets into Pandas
df_transaction = pd.read_csv("cleaned_transaction_takehome.csv")
df_user = pd.read_csv("cleaned_user_takehome.csv")
df_product = pd.read_csv("cleaned_product_takehome.csv")


In [4]:
# Save the tables into the SQLite database
df_transaction.to_sql("TRANSACTIONS", conn, if_exists="replace", index=False)
df_user.to_sql("USER", conn, if_exists="replace", index=False)
df_product.to_sql("PRODUCTS", conn, if_exists="replace", index=False)

839813

In [22]:
query = '''
Select *
from TRANSACTIONS t
LEFT JOIN USER u 
ON t.USER_ID = u.ID
LEFT JOIN PRODUCTS p 
ON t.BARCODE = p.BARCODE
WHERE u.AGE is not null
'''

In [26]:
query1 = '''
SELECT p.BRAND
FROM TRANSACTIONS t
LEFT JOIN USER u ON t.USER_ID = u.ID
LEFT JOIN PRODUCTS p ON t.BARCODE = p.BARCODE
WHERE u.AGE >= 21.0
    AND p.BRAND IS NOT NULL
GROUP BY p.BRAND
ORDER BY COUNT(t.RECEIPT_ID) DESC
LIMIT 5
'''

In [27]:
df = pd.read_sql_query(query1, conn)
df

Unnamed: 0,BRAND
0,NERDS CANDY
1,DOVE
2,TRIDENT
3,SOUR PATCH KIDS
4,MEIJER


In [40]:
query2 = '''
SELECT p.BRAND, SUM(t.FINAL_SALE) AS total_sales
FROM TRANSACTIONS t
LEFT JOIN USER u ON t.USER_ID = u.ID
LEFT JOIN PRODUCTS p ON t.BARCODE = p.BARCODE
WHERE u.CREATED_DATE <= DATE('now', '-6 months') AND p.BRAND IS NOT NULL
GROUP BY p.BRAND
ORDER BY total_sales DESC
LIMIT 5;
'''

In [41]:
df = pd.read_sql_query(query2, conn)
df

Unnamed: 0,BRAND,total_sales
0,CVS,72.0
1,DOVE,30.91
2,TRIDENT,23.36
3,COORS LIGHT,17.48
4,TRESEMMÉ,14.58


In [60]:
query3 = '''
WITH user_activity AS (
    SELECT 
        t.USER_ID,
        COUNT(DISTINCT t.RECEIPT_ID) AS total_receipts,  -- Total receipts scanned
        
        -- Purchases in last 90 days
        COUNT(CASE WHEN t.PURCHASE_DATE >= DATE('now', '-90 days') THEN t.RECEIPT_ID END) AS purchases_last_90_days,  
        
        SUM(t.FINAL_SALE) AS total_spent,  -- Total amount spent
        
        -- Active months in transactions (format YYYY-MM to group by month)
        COUNT(DISTINCT strftime('%Y-%m', t.PURCHASE_DATE)) AS active_months  
        
    FROM "TRANSACTIONS" t
    JOIN "USER" u ON t.USER_ID = u.ID
    WHERE t.FINAL_SALE > 0  -- Exclude invalid transactions
    GROUP BY t.USER_ID
),
ranked_users AS (
    -- Apply ranking in this subquery
    SELECT 
        USER_ID,
        total_receipts,
        total_spent,
        purchases_last_90_days,
        active_months,
        RANK() OVER (ORDER BY total_receipts DESC, purchases_last_90_days DESC, total_spent DESC, active_months DESC) AS user_rank
    FROM user_activity
)
-- Now apply the filter on the ranking result
SELECT * FROM ranked_users WHERE user_rank <= 25;

'''

In [61]:
df = pd.read_sql_query(query3, conn)
df

Unnamed: 0,USER_ID,total_receipts,total_spent,purchases_last_90_days,active_months,user_rank
0,62ffec490d9dbaff18c0a999,3,26.14,0,3,1
1,61a58ac49c135b462ccddd1c,3,14.99,0,2,2
2,5c366bf06d9819129dfa1118,3,12.2,0,3,3
3,62c09104baa38d1a1f6c260e,3,11.3,0,3,4
4,610a8541ca1fab5b417b5d33,3,10.97,0,2,5
5,6528a0a388a3a884364d94dc,3,6.25,0,1,6
6,643059f0838dd2651fb27f50,2,75.99,0,1,7
7,5fc12a8a16770448f92e56b8,2,14.67,0,2,8
8,632fc9dc0c625b72ae991f83,2,14.27,0,2,9
9,6351760a3a4a3534d9393ecd,2,13.87,0,2,10
